Ejemplo n.º 1
0
def prepare_functions(input_size, hidden_size, latent_size, step_count,
                      batch_size, train_X, valid_X):
    P = Parameters()
    encode_decode = model.build(P,
                                input_size=input_size,
                                hidden_size=hidden_size,
                                latent_size=latent_size)
    P.W_decoder_input_0.set_value(P.W_decoder_input_0.get_value() * 10)

    X = T.matrix('X')
    step_count = 10
    parameters = P.values()

    cost_symbs = []
    for s in xrange(step_count):
        Z_means, Z_stds, alphas, \
            X_mean, log_pi_samples = encode_decode(X, step_count=s + 1)
        batch_recon_loss, log_p = model.recon_loss(X, X_mean, log_pi_samples)
        recon_loss = T.mean(batch_recon_loss, axis=0)
        reg_loss = T.mean(model.reg_loss(Z_means, Z_stds, alphas), axis=0)
        vlb = recon_loss + reg_loss
        corr = T.mean(T.eq(T.argmax(log_p, axis=0),
                           T.argmax(log_pi_samples, axis=0)),
                      axis=0)
        cost = cost_symbs.append(vlb)

    avg_cost = sum(cost_symbs) / step_count
    cost = avg_cost + 1e-3 * sum(T.sum(T.sqr(w)) for w in parameters)

    gradients = updates.clip_deltas(T.grad(cost, wrt=parameters), 5)

    print "Updated parameters:"
    pprint(parameters)
    idx = T.iscalar('idx')

    train = theano.function(
        inputs=[idx],
        outputs=[
            vlb, recon_loss, reg_loss,
            T.max(T.argmax(log_pi_samples, axis=0)), corr
        ],
        updates=updates.adam(parameters, gradients, learning_rate=1e-4),
        givens={X: train_X[idx * batch_size:(idx + 1) * batch_size]})

    validate = theano.function(inputs=[], outputs=vlb, givens={X: valid_X})

    sample = theano.function(inputs=[],
                             outputs=[
                                 X, X_mean,
                                 T.argmax(log_pi_samples, axis=0),
                                 T.exp(log_pi_samples)
                             ],
                             givens={X: valid_X[:10]})

    return train, validate, sample
Ejemplo n.º 2
0
    P = Parameters() 
    iaf, masks = iaf_made_wn(P,L=8,num_units=64,
                             num_hids=1,nonl=T.nnet.elu,
                             cond_bias=False)
    zT, ss = iaf(z0,cond_bias=None)
    parameters = P.values()
    pprint(parameters)
    

    logp = U(zT)
    logq = - ss
    losses = logq - logp
    loss = losses.mean()
    gradients = updates.clip_deltas(T.grad(loss, wrt=parameters), 5)    
    P_train = Parameters()    
    fupdates = updates.adam(parameters, gradients,
                            learning_rate=1e-3, P=P_train)
    
    train = theano.function([z0],[loss,logq.mean(),logp.mean()],
                            updates=fupdates)
    samples = theano.function([z0],zT)
    
    gradoldold = 0 # for debugging
    gradold = 0
    print 'starting training'
    for i in range(50000):
        spl = np.random.randn(64,2).astype(floatX)
        outs = train(spl)
        l = outs[0]
        lq = outs[1]
        lp = outs[2]
        gs = outs[2:]
Ejemplo n.º 3
0
                     W * W_s / W_s_batch,
                     W * W_b / W_b_batch)

    output = f(X)
    test_output = f(X, test=True)
    soft_ams = ams(output, Y, W_hat)
    discrete_ams = ams(output > 0.5, Y, W_hat)
    parameters = P.values()
    gradients = T.grad(-soft_ams, wrt=parameters)

    idx = T.iscalar('idx')
    batch_size = T.iscalar('batch_size')
    train = theano.function(
        inputs=[idx, batch_size],
        outputs=[soft_ams, discrete_ams],
        updates=updates.adam(parameters, gradients, learning_rate=5e-4),
        givens={
            X: data_X[idx * batch_size: (idx + 1) * batch_size],
            W: data_W[idx * batch_size: (idx + 1) * batch_size],
            Y: data_Y[idx * batch_size: (idx + 1) * batch_size],
        }
    )

    test = theano.function(
        inputs=[X, W, Y],
        outputs=[ams(test_output > 0.5, Y, W_hat),
                 ams(test_output > 0.7, Y, W_hat),
                 ams(test_output > 0.9, Y, W_hat),
                 ams(test_output > 0.99, Y, W_hat)]
    )
    print "Compilation done."
Ejemplo n.º 4
0
    print "Compiling functions...",
    # pretrain = theano.function(
    #     inputs=[idx],
    #     outputs=[recon_loss / (32 * 32)] + latent_kls,
    #     updates=updates.adam(
    #         parameters,
    #         T.grad(pretrain_loss, wrt=parameters),
    #         learning_rate=1e-3
    #     ),
    #     givens={X: chunk_X[idx * batch_size:(idx + 1) * batch_size]}
    # )

    train = theano.function(
        inputs=[idx],
        outputs=[recon_loss / (32 * 32)] + latent_kls,
        updates=updates.adam(parameters, gradients, learning_rate=1e-4) +
        [(beta_lin, beta_lin + 1)],
        givens={X: chunk_X[idx * batch_size:(idx + 1) * batch_size]})

    test = theano.function(inputs=[X],
                           outputs=[val_loss, recon_loss / (32 * 32)] +
                           latent_kls)
    show_betas = theano.function(inputs=[], outputs=betas)
    print "Done compilation."

    def data_stream():
        stream = data_io.stream_file("data/train2014.pkl.gz")
        stream = data_io.buffered_random(stream)
        stream = data_io.chunks((x[0] for x in stream),
                                buffer_items=chunk_size)
        stream = data_io. async (stream, queue_size=2)
Ejemplo n.º 5
0
    [Z_prior_mean, Z_prior_std,
        Z_mean, Z_std, X_mean, X_std] = extract(X,l)

    parameters = P.values()
    batch_cost = model.cost(X, Z_prior_mean, Z_prior_std,
                      Z_mean, Z_std, X_mean, X_std,l)
    print "Calculating gradient..."
    print parameters
    batch_size = T.cast(X.shape[1],'float32')

    gradients = T.grad(batch_cost,wrt=parameters)
    gradients = [ g / batch_size for g in gradients ]
    gradients = clip(5,parameters,gradients)

    P_learn = Parameters()
    updates = updates.adam(parameters,gradients,learning_rate=0.00025,P=P_learn)
    updates = normalise_weights(updates)

    print "Compiling..."
    train = theano.function(
            inputs=[X,l],
            outputs=batch_cost,
            updates=updates,
        )
    test = theano.function(inputs=[X,l],outputs=batch_cost)


    print "Calculating mean variance..."
    rand_stream = data_io.random_select_stream(*[
        data_io.stream_file('data/train.%02d.pklgz' % i)
        for i in xrange(1, 20)
Ejemplo n.º 6
0
    [Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std] = extract(X, l)

    parameters = P.values()
    batch_cost = model.cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std,
                            X_mean, X_std, l)
    print "Calculating gradient..."
    print parameters
    batch_size = T.cast(X.shape[1], 'float32')

    gradients = T.grad(batch_cost, wrt=parameters)
    gradients = [g / batch_size for g in gradients]
    gradients = clip(5, parameters, gradients)

    P_learn = Parameters()
    updates = updates.adam(parameters,
                           gradients,
                           learning_rate=0.00025,
                           P=P_learn)
    updates = normalise_weights(updates)

    print "Compiling..."
    train = theano.function(
        inputs=[X, l],
        outputs=batch_cost,
        updates=updates,
    )
    test = theano.function(inputs=[X, l], outputs=batch_cost)

    print "Calculating mean variance..."
    rand_stream = data_io.random_select_stream(*[
        data_io.stream_file('data/train.%02d.pklgz' % i)
        for i in xrange(1, 20)