def param_init_action_response_layer(options, params, constraints, prefix='ar', nin=0, rng=None, unif_range=0.2, level=0, **kwargs): ''' Action response layers. ''' rng = init_rng(rng) n_features = options['hidden_units'][-1] if options['shared_ld']: params, constraints = init_level_dist(params, unif_range, nin, rng, constraints) else: if level > 0: params[_p(prefix, 'ld')] = floatx(rng.uniform(size=(level), low=0.1, high=0.9)) params[_p(prefix, 'ld')] /= params[_p(prefix, 'ld')].sum() constraints['simplex'] = constraints.get('simplex', []) + [_p(prefix, 'ld')] initial_Wf = numpy.zeros(n_features) initial_Wf += floatx(rng.uniform(size=(n_features), low=0., high=unif_range)) initial_Wf /= initial_Wf.sum() if level == 0: params[_p(prefix, 'Wf')] = floatx(initial_Wf) constraints['simplex'] = constraints.get('simplex', []) + [_p(prefix, 'Wf')] if level > 0: params[_p(prefix, 'W_h')] = floatx(rng.uniform(size=(1+options['hidden_units'][-1]), low=-0.01, high=0.01)) if level > 0: params[_p(prefix, 'lam')] = floatx(1.0) return params, constraints
def action_response_layer(tparams, features, options, payoff=None, prefix='ar', opposition=None, level=0, **kwargs): """ action_response_layer: tensor3, (tensor3) -> matrix features, (opposition) -> ar_layer Tensor dims: features: iter, action_payoff, feature opposition: iter, level, prob of action output: iter, prob of action Probability of an action given features and beliefs about opposition. """ n, f, i = features.shape # Weights on opposition players if level == 0: w_feat = tparams[_p(prefix, 'Wf')] weighted_features = tensor.sum(features * w_feat.dimshuffle('x', 0, 'x'), axis=1) ar = weighted_features return ar, weighted_features, None else: weighted_features = None lam = tparams[_p(prefix, 'lam')] if options['shared_ld']: level_dist = tparams['ld'] ld = level_dist ld += floatx(1e-32) # avoid divide by zero ld = ld[0:level] ld /= ld.sum() else: ld = tparams[_p(prefix, 'ld')] ld += floatx(1e-32) ld /= ld.sum() # U * AR * ld (where * is matrix product) weighting = opposition * ld.dimshuffle('x', 0, 'x') prob_a = tensor.sum(weighting, axis=1) payoff = payoff * tparams[_p(prefix, 'W_h')].dimshuffle('x', 0, 'x', 'x') payoff = tensor.sum(payoff,axis=1) br = tensor.sum(payoff * prob_a.dimshuffle(0, 'x', 1), axis=2) out = br # remove weighted_features, br when done with visualisation return tensor.nnet.softmax(out * lam), weighted_features, br
def param_init_hid_layer(options, params, prefix='hidden', nin=None, nout=None, rng=None, init=xavier_weight, b_offset=0.): if nin is None: nin = options['n_players'] params[_p(prefix, 'W')] = init(nin, nout, rng=rng) params[_p(prefix, 'b')] = floatx(b_offset + numpy.zeros((nout,))) return params
def param_init_output(options, params, constraints, rng, nin, prefix=''): if options['shared_ld']: return params, constraints else: params['ld'] = floatx(rng.uniform(size=(nin), low=0.1, high=0.9)) # constrain level distribution to be on the simplex params['ld'] /= params['ld'].sum() constraints['simplex'] = constraints.get('simplex', []) + ['ld'] return params, constraints
def init_level_dist(params, upper_bound, nin, rng, constraints): ''' Initialize a shared level distribution in the action response layers. ''' if 'ld' not in params: params['ld'] = floatx(rng.uniform(size=(nin), low=0.1, high=0.9)) # constrain level distribution to be on the simplex params['ld'] /= params['ld'].sum() constraints['simplex'] = constraints.get('simplex', []) + ['ld'] return params, constraints
def sgd(lr, tparams, grads, inp, cost, use_noise,**kwargs): print 'Using SGD' gshared = [theano.shared(p.get_value() * floatx(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] gsup = [(gs, gs + g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inp, cost, givens={use_noise: numpy.float32(1.)}, on_unused_input='warn', updates=gsup, allow_input_downcast=True) pup = [(p, p - lr * (g)) for p, g in zip(itemlist(tparams), gshared)] f_update = theano.function([lr], [], updates=pup, allow_input_downcast=True) return f_grad_shared, f_update, gshared
def list_update(data, model, batchsize=None, f_update=None, return_n_obs=False, rng=numpy.random.RandomState(None)): if batchsize is not None: data = sample_minibatch(data, batchsize, rng) loss = 0.0 y_tot = 0 for i in data: n = data[i][1].shape[0] X = floatx(data[i][0].reshape(n, 2, i[0], i[1])) y = numpy.ndarray.astype(data[i][1], 'int32') y_tot += y.sum() loss += model(X, y) if f_update is not None: f_update(0.01) if return_n_obs: return loss, y_tot else: return loss
def train(options, data, load_params=False, start_epoc=0): print "OPTIONS: ", options print 'Setting up model with options:' options = set_defaults(options) for kk, vv in options.iteritems(): print kk, vv print "model seed: ", options['model_seed'] print "fold: ", options['fold'] print 'seed: ', options['seed'] rng = numpy.random.RandomState(options['model_seed'] + 100 * options.get('fold', 99) + options.get('seed', 99)) params, operators = init_params(options, rng) print 'done...' if load_params: loaded = load_par(options) start_epoc = resume_epoc(options) # Check that we've loaded the correct parameters... for kk, vv in loaded.iteritems(): assert params[kk].shape == vv.shape assert type(params[kk]) == type(vv) params = loaded tparams = init_tparams(params) trng, use_noise, inps, out = build_model(tparams, options, rng) y = tensor.imatrix('y') cost = nll(out, y) f_eval = theano.function([inps, y], cost, givens={use_noise: numpy.float32(0.)}, on_unused_input='ignore') reg = 0. for k, v in tparams.iteritems(): if k[:6] == 'hidden' or k[-3:] == 'W_h': reg += options['l1'] * tensor.sum(abs(v)) reg += options['l2'] * tensor.sum((v)**2) cost += reg grads = tensor.grad(cost, wrt=itemlist(tparams)) lr = tensor.scalar(name='lr', dtype=theano.config.floatX) opt = get_optim(options['opt']) print 'Compiling functions' f_grad_shared, f_update, gshared = opt(lr, tparams, grads, [inps, y], cost, use_noise) f_out = theano.function([inps], out, givens={use_noise: numpy.float32(0.)}, on_unused_input='ignore', allow_input_downcast=True) best = numpy.inf print 'Starting training' train = list_update(data[0], f_eval, options['batch_size'], rng=rng) test = list_update(data[-1], f_eval, options['batch_size'], rng=rng) starting = (train, test) print 'Pre-training. test: %f, train: %f' % (test, train) print 'Training' lr = options['lr'] max_itr = options['max_itr'] grad_norm = 0. train_scores = 50 * [0.] try: for epoch in xrange(max_itr): start_time = time.time() for g in gshared: # manually set gradients to 0 because we accumulate in list update g.set_value(0.0 * g.get_value()) use_noise.set_value(1.) train_cost, n_obs = list_update(data[0], f_grad_shared, batchsize=options['batch_size'], rng=rng, return_n_obs=True) use_noise.set_value(0.) for g in gshared: g.set_value(floatx(g.get_value() / float(n_obs))) f_update(lr) apply_proximity(tparams, operators) train = list_update(data[0], f_eval, options['batch_size'], rng=rng) elapsed_time = time.time() - start_time if train < best: # early stopping on training set test = list_update(data[-1], f_eval) best_par = unzip(tparams) best_perf = (train, test) best = train test = list_update(data[-1], f_eval) if (epoch % 50) == 0: # Save progress.... save_progress(options, tparams, epoch, best_perf) print 'Epoch: %d, cost: %f, train: %f, test: %f, lr:%f, time: %f' % ( epoch, train_cost, train, test, lr, elapsed_time) # Check if we're diverging... train_ave = running_ave(train_scores, train, epoch) if epoch > 1000: # Only exit if we're diverging after 1000 iterations if train_ave > 1.03 * best_perf[0]: print "Diverged..." break except KeyboardInterrupt: print "Interrupted" # check that we're outputing prob distributions X = data[0][(3, 3)][0] assert abs( f_out(X.reshape(X.shape[0], 2, 3, 3)).sum() - float(X.shape[0])) < 1e-4 print "Best performance:" print "train, test" print "%f,%f" % best_perf return best_perf, best_par
def adam(lr, tparams, grads, inp, cost, use_noise, **kwargs): ''' See: Adam - a method for stochastic optimization. https://arxiv.org/abs/1412.6980 Note that when using Adam, the lr learning rate parameter does nothing because Adam chooses per-parameter learning rates. If you want to be able to manually turn down the learning rate, you can modify the parameter update line: p_t = p - (lr_t * g_t) to: p_t = p - lr * (lr_t * g_t) so that you have a global learning rate parameter. ''' gshared = [theano.shared(p.get_value() * floatx(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] gsup = [(gs, gs + g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inp, cost, updates=gsup, allow_input_downcast=True) lr0 = floatx(0.0002) b1 = floatx(0.1) b2 = floatx(0.001) e = floatx(1e-8) updates = [] i = theano.shared(floatx(0.)) i_t = i + floatx(1.) fix1 = floatx(1.) - b1**(i_t) fix2 = floatx(1.) - b2**(i_t) lr_t = lr0 * (tensor.sqrt(fix2) / fix1) for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() *floatx( 0.)) v = theano.shared(p.get_value() *floatx( 0.)) m_t = (b1 * g) + ((floatx(1.) - b1) * m) v_t = (b2 * tensor.sqr(g)) + ((floatx(1.) - b2) * v) g_t = m_t / (tensor.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) f_update = theano.function([lr], [], updates=updates, on_unused_input='ignore', allow_input_downcast=True) return f_grad_shared, f_update, gshared