Esempio n. 1
0
def build_model(new_model=True):
    momentum_epsilon = 0.9

    block_size = 64
    nblocks = [8,8,8]
    rate = [1/4.,1/32.,1/32.]
    L2reg = 0.05

    lambda_b = [80,40,40]
    lambda_v = [10,20,20]
    learning_rates = [0.01,0.5,0.5]


    do_dropout = False

    print locals()

    hyperparams = locals()




    if new_model:
        expid = str(uuid.uuid4())
        import os
        import os.path
        code = file(os.path.abspath(__file__),'r').read()
        os.mkdir(expid)
        os.chdir(expid)
        file('code.py','w').write(code)

        print expid

        f = file("params.txt",'w')
        for i in hyperparams:
            f.write("%s:%s\n"%(i,str(hyperparams[i])))
        f.close()


    params = []
    reinforce_params = []
    shared.bind(reinforce_params, "reinforce")
    shared.bind(params)

    rect = lambda x:T.maximum(0,x)
    act = T.tanh

    model = StackModel([PolicyDropoutLayer(32*32*3, block_size*nblocks[0],
                                           block_size, act),
                        PolicyDropoutLayer(block_size*nblocks[0], block_size*nblocks[1],
                                           block_size, act),
                        PolicyDropoutLayer(block_size*nblocks[1], block_size*nblocks[2],
                                           block_size, act),
                        InputSparseHiddenLayer(block_size*nblocks[-1], 10, T.nnet.softmax,
                                               block_size=block_size)])


    x = T.matrix()
    y = T.ivector()
    lr = T.scalar()

    y_hat, = model(x)
    loss = T.nnet.categorical_crossentropy(y_hat, y)
    cost = T.sum(loss)
    l2 = lambda x:sum([T.sum(i**2) for i in x])
    updates = []
    all_probs = []
    assymetric_distance = lambda x: T.minimum(0,x) * -0.1 + T.maximum(0,x)
    for i in range(len(model.layers)-1):
        probs = model.layers[i].probs
        sample_probs = model.layers[i].sample_probs
        layer_params = [model.layers[i].d.W, model.layers[i].d.b]
        all_probs.append(probs)

        l2_batchwise = lambda_b[i] * T.sum(abs(T.mean(probs, axis=0) - rate[i])**2)
        l2_exawise   = lambda_b[i] * 0.001*T.sum(abs(T.mean(probs, axis=1) - rate[i])**2)
        batch_var    = lambda_v[i] * T.sum(T.var(probs, axis=0))
        batch_var    += lambda_v[i] * 0.1*T.sum(T.var(probs, axis=1))

        #l2_batchwise = lambda_b[i] * T.sum(assymetric_distance(T.mean(probs, axis=0) - rate[i]))
        #l2_exawise = lambda_b[i] * 0.001*T.sum(assymetric_distance(T.mean(probs, axis=1) - rate[i]))
        #batch_var   = lambda_v[i] * T.sum(T.mean(assymetric_distance(rate-probs), axis=0))
        #batch_var =  T.sum(T.mean((probs-(1-rate))**2, axis=0))
        regularising_cost = l2_batchwise + l2_exawise - batch_var + L2reg * l2(layer_params)
        updates += reinforce_no_baseline(layer_params, sample_probs,
                                                  loss-loss.min(),# momentum_epsilon,
                                                  lr*learning_rates[i],
                                                  regularising_cost)

    error = T.sum(T.neq(y_hat.argmax(axis=1), y))
    nn_regularization = L2reg * l2(params)

    grads = T.grad(cost + nn_regularization, params)
    updates += gradient_descent(params, grads, lr)
    print params, reinforce_params

    learn = theano.function([x,y,lr], [cost, error, T.concatenate(all_probs,axis=1)], updates=updates, allow_input_downcast=True)
    test = theano.function([x,y], [cost, error], allow_input_downcast=True)

    return model,learn,test
Esempio n. 2
0
def build_model(new_model=True):
    momentum_epsilon = 0.9

    nhidden = [64, 64]
    L2reg = 0.001

    vanilla = True

    hyperparams = locals()

    if new_model:
        expid = str(uuid.uuid4())
        import os
        import os.path
        code = file(os.path.abspath(__file__), 'r').read()
        os.mkdir(expid)
        os.chdir(expid)
        file('code.py', 'w').write(code)

        print expid

        f = file("params.txt", 'w')
        for i in hyperparams:
            f.write("%s:%s\n" % (i, str(hyperparams[i])))
        f.close()

    params = []
    shared.bind(params)

    rect = lambda x: T.maximum(0, x)
    act = T.tanh

    model = StackModel([
        HiddenLayer(32 * 32 * 3, nhidden[0], act),
        Dropout(),
        HiddenLayer(nhidden[0], nhidden[1], act),
        Dropout(),
        HiddenLayer(nhidden[-1], 10, T.nnet.softmax)
    ])

    x = T.matrix()
    y = T.ivector()
    lr = T.scalar()

    y_hat, = model(x)
    loss = T.nnet.categorical_crossentropy(y_hat, y)
    cost = T.sum(loss)
    l2 = lambda x: sum([T.sum(i**2) for i in x])
    updates = []
    error = T.sum(T.neq(y_hat.argmax(axis=1), y))
    nn_regularization = L2reg * l2(params)

    grads = T.grad(cost + nn_regularization, params)
    updates += gradient_descent(params, grads, lr)

    learn = theano.function([x, y, lr], [cost, error],
                            updates=updates,
                            allow_input_downcast=True)
    test = theano.function([x, y], [cost, error], allow_input_downcast=True)

    return model, learn, test
Esempio n. 3
0
def build_model(new_model=True):
    momentum_epsilon = 0.9

    block_size = 64
    nblocks = [8,8,8]
    rate = [1/4.,1/32.,1/32.]
    L2reg = 0.05

    lambda_b = [80,40,40]
    lambda_v = [10,20,20]
    learning_rates = [0.01,0.5,0.5]


    do_dropout = False

    print locals()

    hyperparams = locals()




    if new_model:
        expid = str(uuid.uuid4())
        import os
        import os.path
        code = file(os.path.abspath(__file__),'r').read()
        os.mkdir(expid)
        os.chdir(expid)
        file('code.py','w').write(code)

        print expid

        f = file("params.txt",'w')
        for i in hyperparams:
            f.write("%s:%s\n"%(i,str(hyperparams[i])))
        f.close()


    params = []
    reinforce_params = []
    shared.bind(reinforce_params, "reinforce")
    shared.bind(params)

    rect = lambda x:T.maximum(0,x)
    act = T.tanh

    model = StackModel([PolicyDropoutLayer(32*32*3, block_size*nblocks[0],
                                           block_size, act),
                        PolicyDropoutLayer(block_size*nblocks[0], block_size*nblocks[1],
                                           block_size, act),
                        PolicyDropoutLayer(block_size*nblocks[1], block_size*nblocks[2],
                                           block_size, act),
                        InputSparseHiddenLayer(block_size*nblocks[-1], 10, T.nnet.softmax,
                                               block_size=block_size)])


    x = T.matrix()
    y = T.ivector()
    lr = T.scalar()

    y_hat, = model(x)
    loss = T.nnet.categorical_crossentropy(y_hat, y)
    cost = T.sum(loss)
    l2 = lambda x:sum([T.sum(i**2) for i in x])
    updates = []
    all_probs = []
    assymetric_distance = lambda x: T.minimum(0,x) * -0.1 + T.maximum(0,x)
    for i in range(len(model.layers)-1):
        probs = model.layers[i].probs
        sample_probs = model.layers[i].sample_probs
        layer_params = [model.layers[i].d.W, model.layers[i].d.b]
        all_probs.append(probs)

        l2_batchwise = lambda_b[i] * T.sum(abs(T.mean(probs, axis=0) - rate[i])**2)
        l2_exawise   = lambda_b[i] * 0.001*T.sum(abs(T.mean(probs, axis=1) - rate[i])**2)
        batch_var    = lambda_v[i] * T.sum(T.var(probs, axis=0))
        batch_var    += lambda_v[i] * 0.1*T.sum(T.var(probs, axis=1))

        #l2_batchwise = lambda_b[i] * T.sum(assymetric_distance(T.mean(probs, axis=0) - rate[i]))
        #l2_exawise = lambda_b[i] * 0.001*T.sum(assymetric_distance(T.mean(probs, axis=1) - rate[i]))
        #batch_var   = lambda_v[i] * T.sum(T.mean(assymetric_distance(rate-probs), axis=0))
        #batch_var =  T.sum(T.mean((probs-(1-rate))**2, axis=0))
        regularising_cost = l2_batchwise + l2_exawise - batch_var + L2reg * l2(layer_params)
        updates += reinforce_no_baseline(layer_params, sample_probs,
                                                  loss-loss.min(),# momentum_epsilon,
                                                  lr*learning_rates[i],
                                                  regularising_cost)

    error = T.sum(T.neq(y_hat.argmax(axis=1), y))
    nn_regularization = L2reg * l2(params)

    grads = T.grad(cost + nn_regularization, params)
    updates += gradient_descent(params, grads, lr)
    print params, reinforce_params

    learn = theano.function([x,y,lr], [cost, error, T.concatenate(all_probs,axis=1)], updates=updates, allow_input_downcast=True)
    test = theano.function([x,y], [cost, error], allow_input_downcast=True)

    return model,learn,test
Esempio n. 4
0
def build_model(new_model=True):
    momentum_epsilon = 0.9

    nhidden = [64,64]
    L2reg = 0.001

    vanilla = True

    hyperparams = locals()




    if new_model:
        expid = str(uuid.uuid4())
        import os
        import os.path
        code = file(os.path.abspath(__file__),'r').read()
        os.mkdir(expid)
        os.chdir(expid)
        file('code.py','w').write(code)

        print expid

        f = file("params.txt",'w')
        for i in hyperparams:
            f.write("%s:%s\n"%(i,str(hyperparams[i])))
        f.close()


    params = []
    shared.bind(params)

    rect = lambda x:T.maximum(0,x)
    act = T.tanh

    model = StackModel([HiddenLayer(32*32*3, nhidden[0],  act),
                        Dropout(),
                        HiddenLayer(nhidden[0], nhidden[1], act),
                        Dropout(),
                        HiddenLayer(nhidden[-1], 10, T.nnet.softmax)])


    x = T.matrix()
    y = T.ivector()
    lr = T.scalar()

    y_hat, = model(x)
    loss = T.nnet.categorical_crossentropy(y_hat, y)
    cost = T.sum(loss)
    l2 = lambda x:sum([T.sum(i**2) for i in x])
    updates = []
    error = T.sum(T.neq(y_hat.argmax(axis=1), y))
    nn_regularization = L2reg * l2(params)

    grads = T.grad(cost + nn_regularization, params)
    updates += gradient_descent(params, grads, lr)

    learn = theano.function([x,y,lr], [cost, error], updates=updates, allow_input_downcast=True)
    test = theano.function([x,y], [cost, error], allow_input_downcast=True)

    return model,learn,test