예제 #1
0
def build_updates(parameters, gradients, update_vars, initial_learning_rate,
                  momentum):
    update_vars._learning_rate = initial_learning_rate
    return updates.momentum(parameters,
                            gradients,
                            P=P,
                            learning_rate=update_vars._learning_rate,
                            mu=momentum)
예제 #2
0
 def turing_updates(cost , lr) :
     params = P.values()
     #whether add P weight decay
     l2 = T.sum(0)
     for p in params:
         l2 = l2 + (p ** 2).sum()
     all_cost = cost + 1e-3 * l2
     clipper = updates.clip(5.) 
     g = T.grad(all_cost, wrt=params)
     grads = clipper(g)
     return updates.momentum(params, grads, mu = 0, learning_rate=lr)
예제 #3
0
 def turing_updates(cost , lr) :
     params = P.values()
     #whether add P weight decay
     l2 = T.sum(0)
     """
     for p in params:
         l2 = l2 + (p ** 2).sum()
     all_cost = cost + 1e-3 * l2 
     """
     all_cost = cost 
     grads = [T.clip(g, -100, 100) for g in T.grad(all_cost, wrt=params)]
     return updates.momentum(params, grads, mu=0, learning_rate=lr)
예제 #4
0
    def turing_updates(cost , lr) :
        params = P.values()
        #whether add P weight decay
        l2 = T.sum(0).astype(theano.config.floatX)
        for p in params:
            l2 = l2 + (p ** 2).sum().astype(theano.config.floatX)
        all_cost = cost + 1e-3 * l2 
        clipper = updates.clip(5.)
        g = T.grad(all_cost, wrt=params)
        grads = clipper(g)
#        grads = [T.clip(g, -5, 5) for g in T.grad(all_cost, wrt=params)]
#        return updates.rmsprop(params, grads, learning_rate=lr)
        return updates.momentum(params, grads, mu = 0, learning_rate=lr)        
예제 #5
0
    probs = predict(X)
    alpha = 0.5
    params = P.values()
    cost = ctc.cost(probs, Y)  #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params)
    gradients = T.grad(cost, wrt=params)

    gradient_acc = [theano.shared(0 * p.get_value()) for p in params]
    counter = theano.shared(np.float32(0.))
    acc = theano.function(inputs=[X, Y],
                          outputs=cost,
                          updates=[(a, a + g)
                                   for a, g in zip(gradient_acc, gradients)] +
                          [(counter, counter + np.float32(1.))])
    update = theano.function(
            inputs=[],outputs=[],
            updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \
                    + [ (a, np.float32(0) * a) for a in gradient_acc ] \
                    + [ (counter,np.float32(0.)) ]
        )

    test = theano.function(inputs=[X, Y], outputs=probs[:, Y])

    training_examples = [word.strip() for word in open('dictionary.txt')]
    import random
    for _ in xrange(1500):
        random.shuffle(training_examples)
        for i, string in enumerate(training_examples):
            print acc(font.imagify(string), label_seq(string))
            if i % 20 == 0: update()
            if i % 100 == 0:
                hinton.plot(test(font.imagify("test"), label_seq("test")).T,
예제 #6
0
파일: ocr.py 프로젝트: Duum/theano-ctc
    params = P.values()
    cost = ctc.cost(probs, Y) #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params)
    gradients = T.grad(cost, wrt=params)

    gradient_acc = [ theano.shared(0 * p.get_value()) for p in params ]
    counter = theano.shared(np.float32(0.))
    acc = theano.function(
            inputs=[X, Y],
            outputs=cost,
            updates = [
                (a,a + g) for a,g in zip(gradient_acc,gradients)
            ] + [(counter,counter + np.float32(1.))]
        )
    update = theano.function(
            inputs=[],outputs=[],
            updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \
                    + [ (a, np.float32(0) * a) for a in gradient_acc ] \
                    + [ (counter,np.float32(0.)) ]
        )

    test = theano.function(
            inputs=[X,Y],
            outputs=probs[:,Y]
        )

    training_examples = [ word.strip() for word in open('dictionary.txt') ]
    import random
    for _ in xrange(1500):
        random.shuffle(training_examples)
        for i,string in enumerate(training_examples):
            print acc(font.imagify(string),label_seq(string))
예제 #7
0
def build_updates(parameters,gradients,update_vars,initial_learning_rate,momentum):
    update_vars._learning_rate = initial_learning_rate
    return updates.momentum(parameters,gradients,P=P,
                            learning_rate=update_vars._learning_rate,
                            mu=momentum)
예제 #8
0
파일: ocr.py 프로젝트: zxsted/theano-ctc
    alpha = 0.5
    params = P.values()
    cost = ctc.cost(probs, Y)  # + 1e-8 * sum(T.sum(T.sqr(w)) for w in params)
    gradients = T.grad(cost, wrt=params)

    gradient_acc = [theano.shared(0 * p.get_value()) for p in params]
    counter = theano.shared(np.float32(0.))
    acc = theano.function(inputs=[X, Y],
                          outputs=cost,
                          updates=[(a, a + g)
                                   for a, g in zip(gradient_acc, gradients)] +
                          [(counter, counter + np.float32(1.))])
    update = theano.function(inputs=[],
                             outputs=[],
                             updates=updates.momentum(
                                 params,
                                 [g / counter for g in gradient_acc],
                             ) + [(a, np.float32(0) * a)
                                  for a in gradient_acc] +
                             [(counter, np.float32(0.))])

    test = theano.function(inputs=[X, Y], outputs=probs[:, Y])
    training_examples = [word.strip() for word in open('dictionary.txt')]
    import random
    for _ in xrange(1500):
        random.shuffle(training_examples)
        for i, string in enumerate(training_examples):
            print acc(font.imagify(string), label_seq(string))
            if i % 20 == 0:
                update()
            if i % 100 == 0:
                hinton.plot(test(font.imagify("test"), label_seq("test")).T,
예제 #9
0
    x = np.append(x, np.fliplr(x))
    y = np.append(y,y)
    return x,y

if __name__ == "__main__":
    print "Loading dataset..."
    x,y = datasets.transcription_factor()
    x,y   = preprocess(x,y)
    batches = make_batches(x, y)

    print "Compiling theano..."
    X = T.fmatrix('X')
    Y = T.fmatrix('Y')
    P = Parameters()

    net     = model.build(P)
    Y_hat   = net(X)
    predict = theano.function([X], Y_hat, allow_input_downcast=True)
    cost    = model.cost(P, Y_hat, Y)

    print "Calculating gradient..."
    params = P.values()
    grad   = T.grad(cost, wrt = params)
    grad   = [g.astype(theano.config.floatX) for g in grad] #idek...
    train  = theano.function([X,Y], cost,
                updates=updates.momentum(params, grad),
                allow_input_downcast=True)
    trainer = Trainer(batches, train, predict)

    # trainer()
예제 #10
0
                layer,
                reconstruct(
                    corrupt(layer),
                    W,b,b_rec,
                    input_layer = (layer.name == 'X')
                ),
                kl_divergence = (layer.name != 'X')
            )
        lr = 0.003 if layer.name == 'X'  else 0.01
        parameters = [W,b,b_rec]
        gradients  = T.grad(loss,wrt=parameters)
        train_fns.append(
            chunk.build_trainer(
                inputs=[X],
#                outputs=loss,
                updates=updates.momentum(parameters,gradients,learning_rate=lr),
                mapping=shared_variables_mapping
            )
        )
        

        validation_fns.append(
            validator.build(
                inputs=[X],
                outputs={"loss":loss},
                monitored_var="loss",
                validation_stream=frame_data.validation_stream,
                callback=build_validation_callback(P)
            )
        )
        logging.info("Done compiling for layer %s"%layer)
예제 #11
0
파일: ocr.py 프로젝트: SigmaQuan/theano-ctc
    cost = ctc.cost(probs, Y)  # + 1e-8 * sum(T.sum(T.sqr(w)) for w in params)
    gradients = T.grad(cost, wrt=params)

    gradient_acc = [theano.shared(0 * p.get_value()) for p in params]
    counter = theano.shared(np.float32(0.))
    acc = theano.function(
        inputs=[X, Y],
        outputs=cost,
        updates=[
            (a, a + g) for a, g in zip(gradient_acc, gradients)
        ] + [(counter, counter + np.float32(1.))]
    )
    update = theano.function(
        inputs=[], outputs=[],
        updates=updates.momentum(
            params, [g / counter for g in gradient_acc],
        ) + [(a, np.float32(0) * a) for a in gradient_acc] + [(counter, np.float32(0.))]
    )

    test = theano.function(
        inputs=[X, Y],
        outputs=probs[:, Y]
    )
    training_examples = [word.strip() for word in open('dictionary.txt')]
    import random
    for _ in xrange(1500):
        random.shuffle(training_examples)
        for i, string in enumerate(training_examples):
            print acc(font.imagify(string), label_seq(string))
            if i % 20 == 0:
                update()
예제 #12
0
        loss = cost(layer,
                    reconstruct(corrupt(layer),
                                W,
                                b,
                                b_rec,
                                input_layer=(layer.name == 'X')),
                    kl_divergence=(layer.name != 'X'))
        lr = 0.003 if layer.name == 'X' else 0.01
        parameters = [W, b, b_rec]
        gradients = T.grad(loss, wrt=parameters)
        train_fns.append(
            chunk.build_trainer(
                inputs=[X],
                #                outputs=loss,
                updates=updates.momentum(parameters,
                                         gradients,
                                         learning_rate=lr),
                mapping=shared_variables_mapping))

        validation_fns.append(
            validator.build(inputs=[X],
                            outputs={"loss": loss},
                            monitored_var="loss",
                            validation_stream=frame_data.validation_stream,
                            callback=build_validation_callback(P)))
        logging.info("Done compiling for layer %s" % layer)

    save(P)
    for i, (train_fn,
            validation_fn) in enumerate(zip(train_fns, validation_fns)):
        logging.info("Starting pre-training for epoch %d" % i)