def __init__(self, n_in, n_out, block_size, activation, do_dropout=False, reinforce_params="reinforce", default_params="default"): self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation) shared.bind(reinforce_params) self.d = HiddenLayer(n_in, self.nblocks, T.nnet.sigmoid) shared.bind(default_params)
def __init__(self, n_in, n_out, block_size, activation, rate, do_dropout=False): self.rate = rate self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation)
class PolicyDropoutLayer: def __init__(self, n_in, n_out, block_size, activation, do_dropout=False): self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation) shared.bind("reinforce") self.d = HiddenLayer(n_in, self.nblocks, T.nnet.sigmoid) shared.bind("default") def __call__(self, x, xmask=None): probs = self.d(x) * 0.98 + 0.01 mask = srng.uniform(probs.shape) < probs print xmask mask.name = "mask!" masked = self.h.activation(sparse_dot(x, xmask, self.h.W, mask, self.h.b, self.block_size)) if not "this is the equivalent computation in theano": h = self.h(x) if self.do_dropout: h = h * (srng.uniform(h.shape) < 0.5) h_r = h.reshape([h.shape[0], self.nblocks, self.block_size]) masked = h_r * mask.dimshuffle(0,1,'x') masked = masked.reshape(h.shape) self.sample_probs = T.prod(mask*probs+(1-probs)*(1-mask), axis=1) self.probs = probs return masked, mask
class PolicyDropoutLayer: def __init__(self, n_in, n_out, block_size, activation, do_dropout=False, reinforce_params="reinforce", default_params="default"): self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation) shared.bind(reinforce_params) self.d = HiddenLayer(n_in, self.nblocks, T.nnet.sigmoid) shared.bind(default_params) def __call__(self, x, xmask=None): probs = self.d(x) * 0.98 + 0.01 mask = srng.uniform(probs.shape) < probs print xmask mask.name = "mask!" masked = self.h.activation(sparse_dot(x, xmask, self.h.W, mask, self.h.b, self.block_size)) if not "this is the equivalent computation in theano": h = self.h(x) if self.do_dropout: h = h * (srng.uniform(h.shape) < 0.5) h_r = h.reshape([h.shape[0], self.nblocks, self.block_size]) masked = h_r * mask.dimshuffle(0,1,'x') masked = masked.reshape(h.shape) self.sample_probs = T.prod(mask*probs+(1-probs)*(1-mask), axis=1) self.probs = probs return masked, mask
def __init__(self, n_in, n_out, block_size, activation, do_dropout=False): self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation) shared.bind("reinforce") self.d = HiddenLayer(n_in, self.nblocks, T.nnet.sigmoid) shared.bind("default")
class PolicyDropoutLayer: def __init__(self, n_in, n_out, block_size, activation, rate, do_dropout=False): self.rate = rate self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation) def __call__(self, x, xmask=None): mask = srng.uniform((x.shape[0],self.nblocks)) < self.rate masked = self.h.activation(sparse_dot(x, xmask, self.h.W, mask, self.h.b, self.block_size)) return masked, mask
class PolicyDropoutLayer: def __init__(self, n_in, n_out, block_size, activation, rate, do_dropout=False): self.rate = rate self.block_size = block_size self.nblocks = n_out / block_size self.do_dropout = do_dropout assert n_out % block_size == 0 self.h = HiddenLayer(n_in, n_out, activation) def __call__(self, x, xmask=None): mask = srng.uniform((x.shape[0], self.nblocks)) < self.rate masked = self.h.activation( sparse_dot(x, xmask, self.h.W, mask, self.h.b, self.block_size)) return masked, mask
def build_model(new_model=True): momentum_epsilon = 0.9 nhidden = [64, 64] L2reg = 0.001 vanilla = True hyperparams = locals() if new_model: expid = str(uuid.uuid4()) import os import os.path code = file(os.path.abspath(__file__), 'r').read() os.mkdir(expid) os.chdir(expid) file('code.py', 'w').write(code) print expid f = file("params.txt", 'w') for i in hyperparams: f.write("%s:%s\n" % (i, str(hyperparams[i]))) f.close() params = [] shared.bind(params) rect = lambda x: T.maximum(0, x) act = T.tanh model = StackModel([ HiddenLayer(32 * 32 * 3, nhidden[0], act), Dropout(), HiddenLayer(nhidden[0], nhidden[1], act), Dropout(), HiddenLayer(nhidden[-1], 10, T.nnet.softmax) ]) x = T.matrix() y = T.ivector() lr = T.scalar() y_hat, = model(x) loss = T.nnet.categorical_crossentropy(y_hat, y) cost = T.sum(loss) l2 = lambda x: sum([T.sum(i**2) for i in x]) updates = [] error = T.sum(T.neq(y_hat.argmax(axis=1), y)) nn_regularization = L2reg * l2(params) grads = T.grad(cost + nn_regularization, params) updates += gradient_descent(params, grads, lr) learn = theano.function([x, y, lr], [cost, error], updates=updates, allow_input_downcast=True) test = theano.function([x, y], [cost, error], allow_input_downcast=True) return model, learn, test