def build_model(new_model=True): momentum_epsilon = 0.9 block_size = 64 nblocks = [8,8,8] rate = [1/4.,1/32.,1/32.] L2reg = 0.05 lambda_b = [80,40,40] lambda_v = [10,20,20] learning_rates = [0.01,0.5,0.5] do_dropout = False print locals() hyperparams = locals() if new_model: expid = str(uuid.uuid4()) import os import os.path code = file(os.path.abspath(__file__),'r').read() os.mkdir(expid) os.chdir(expid) file('code.py','w').write(code) print expid f = file("params.txt",'w') for i in hyperparams: f.write("%s:%s\n"%(i,str(hyperparams[i]))) f.close() params = [] reinforce_params = [] shared.bind(reinforce_params, "reinforce") shared.bind(params) rect = lambda x:T.maximum(0,x) act = T.tanh model = StackModel([PolicyDropoutLayer(32*32*3, block_size*nblocks[0], block_size, act), PolicyDropoutLayer(block_size*nblocks[0], block_size*nblocks[1], block_size, act), PolicyDropoutLayer(block_size*nblocks[1], block_size*nblocks[2], block_size, act), InputSparseHiddenLayer(block_size*nblocks[-1], 10, T.nnet.softmax, block_size=block_size)]) x = T.matrix() y = T.ivector() lr = T.scalar() y_hat, = model(x) loss = T.nnet.categorical_crossentropy(y_hat, y) cost = T.sum(loss) l2 = lambda x:sum([T.sum(i**2) for i in x]) updates = [] all_probs = [] assymetric_distance = lambda x: T.minimum(0,x) * -0.1 + T.maximum(0,x) for i in range(len(model.layers)-1): probs = model.layers[i].probs sample_probs = model.layers[i].sample_probs layer_params = [model.layers[i].d.W, model.layers[i].d.b] all_probs.append(probs) l2_batchwise = lambda_b[i] * T.sum(abs(T.mean(probs, axis=0) - rate[i])**2) l2_exawise = lambda_b[i] * 0.001*T.sum(abs(T.mean(probs, axis=1) - rate[i])**2) batch_var = lambda_v[i] * T.sum(T.var(probs, axis=0)) batch_var += lambda_v[i] * 0.1*T.sum(T.var(probs, axis=1)) #l2_batchwise = lambda_b[i] * T.sum(assymetric_distance(T.mean(probs, axis=0) - rate[i])) #l2_exawise = lambda_b[i] * 0.001*T.sum(assymetric_distance(T.mean(probs, axis=1) - rate[i])) #batch_var = lambda_v[i] * T.sum(T.mean(assymetric_distance(rate-probs), axis=0)) #batch_var = T.sum(T.mean((probs-(1-rate))**2, axis=0)) regularising_cost = l2_batchwise + l2_exawise - batch_var + L2reg * l2(layer_params) updates += reinforce_no_baseline(layer_params, sample_probs, loss-loss.min(),# momentum_epsilon, lr*learning_rates[i], regularising_cost) error = T.sum(T.neq(y_hat.argmax(axis=1), y)) nn_regularization = L2reg * l2(params) grads = T.grad(cost + nn_regularization, params) updates += gradient_descent(params, grads, lr) print params, reinforce_params learn = theano.function([x,y,lr], [cost, error, T.concatenate(all_probs,axis=1)], updates=updates, allow_input_downcast=True) test = theano.function([x,y], [cost, error], allow_input_downcast=True) return model,learn,test
def build_model(new_model=True): momentum_epsilon = 0.9 nhidden = [64, 64] L2reg = 0.001 vanilla = True hyperparams = locals() if new_model: expid = str(uuid.uuid4()) import os import os.path code = file(os.path.abspath(__file__), 'r').read() os.mkdir(expid) os.chdir(expid) file('code.py', 'w').write(code) print expid f = file("params.txt", 'w') for i in hyperparams: f.write("%s:%s\n" % (i, str(hyperparams[i]))) f.close() params = [] shared.bind(params) rect = lambda x: T.maximum(0, x) act = T.tanh model = StackModel([ HiddenLayer(32 * 32 * 3, nhidden[0], act), Dropout(), HiddenLayer(nhidden[0], nhidden[1], act), Dropout(), HiddenLayer(nhidden[-1], 10, T.nnet.softmax) ]) x = T.matrix() y = T.ivector() lr = T.scalar() y_hat, = model(x) loss = T.nnet.categorical_crossentropy(y_hat, y) cost = T.sum(loss) l2 = lambda x: sum([T.sum(i**2) for i in x]) updates = [] error = T.sum(T.neq(y_hat.argmax(axis=1), y)) nn_regularization = L2reg * l2(params) grads = T.grad(cost + nn_regularization, params) updates += gradient_descent(params, grads, lr) learn = theano.function([x, y, lr], [cost, error], updates=updates, allow_input_downcast=True) test = theano.function([x, y], [cost, error], allow_input_downcast=True) return model, learn, test
def build_model(new_model=True): momentum_epsilon = 0.9 nhidden = [64,64] L2reg = 0.001 vanilla = True hyperparams = locals() if new_model: expid = str(uuid.uuid4()) import os import os.path code = file(os.path.abspath(__file__),'r').read() os.mkdir(expid) os.chdir(expid) file('code.py','w').write(code) print expid f = file("params.txt",'w') for i in hyperparams: f.write("%s:%s\n"%(i,str(hyperparams[i]))) f.close() params = [] shared.bind(params) rect = lambda x:T.maximum(0,x) act = T.tanh model = StackModel([HiddenLayer(32*32*3, nhidden[0], act), Dropout(), HiddenLayer(nhidden[0], nhidden[1], act), Dropout(), HiddenLayer(nhidden[-1], 10, T.nnet.softmax)]) x = T.matrix() y = T.ivector() lr = T.scalar() y_hat, = model(x) loss = T.nnet.categorical_crossentropy(y_hat, y) cost = T.sum(loss) l2 = lambda x:sum([T.sum(i**2) for i in x]) updates = [] error = T.sum(T.neq(y_hat.argmax(axis=1), y)) nn_regularization = L2reg * l2(params) grads = T.grad(cost + nn_regularization, params) updates += gradient_descent(params, grads, lr) learn = theano.function([x,y,lr], [cost, error], updates=updates, allow_input_downcast=True) test = theano.function([x,y], [cost, error], allow_input_downcast=True) return model,learn,test