def param_init_action_response_layer(options, params, constraints, prefix='ar',
                                     nin=0, rng=None, unif_range=0.2,
                                     level=0, **kwargs):
    '''
    Action response layers.
    '''
    rng = init_rng(rng)
    n_features = options['hidden_units'][-1]

    if options['shared_ld']:
        params, constraints = init_level_dist(params, unif_range, nin, rng, constraints)
    else:
        if level > 0:
            params[_p(prefix, 'ld')] = floatx(rng.uniform(size=(level),
                                              low=0.1,
                                              high=0.9))
            params[_p(prefix, 'ld')] /= params[_p(prefix, 'ld')].sum()
            constraints['simplex'] = constraints.get('simplex', []) + [_p(prefix, 'ld')]

    initial_Wf = numpy.zeros(n_features)
    initial_Wf += floatx(rng.uniform(size=(n_features), low=0., high=unif_range))
    initial_Wf /= initial_Wf.sum()

    if level == 0:
        params[_p(prefix, 'Wf')] = floatx(initial_Wf)
        constraints['simplex'] = constraints.get('simplex', []) + [_p(prefix, 'Wf')]

    if level > 0:
        params[_p(prefix, 'W_h')] = floatx(rng.uniform(size=(1+options['hidden_units'][-1]),
                                            low=-0.01,
                                            high=0.01))
    if level > 0:
        params[_p(prefix, 'lam')] = floatx(1.0)
    return params, constraints
def action_response_layer(tparams, features, options, payoff=None,
                          prefix='ar', opposition=None, level=0, **kwargs):
    """
    action_response_layer:  tensor3, (tensor3) -> matrix
                            features, (opposition) -> ar_layer

    Tensor dims:
    features: iter, action_payoff, feature
    opposition: iter, level, prob of action
    output: iter, prob of action

    Probability of an action given features and beliefs about opposition.
    """
    n, f, i = features.shape

    # Weights on opposition players
    if level == 0:
        w_feat = tparams[_p(prefix, 'Wf')]
        weighted_features = tensor.sum(features * w_feat.dimshuffle('x', 0, 'x'), axis=1)

        ar = weighted_features
        return ar, weighted_features, None
    else:
        weighted_features = None
        lam = tparams[_p(prefix, 'lam')] 
        if options['shared_ld']:
            level_dist = tparams['ld']
            ld = level_dist
            ld += floatx(1e-32) # avoid divide by zero
            ld = ld[0:level]
            ld /= ld.sum()
        else:
            ld = tparams[_p(prefix, 'ld')]
            ld += floatx(1e-32)
            ld /= ld.sum()
        
        # U * AR * ld (where * is matrix product)
        weighting = opposition * ld.dimshuffle('x', 0, 'x')
        prob_a = tensor.sum(weighting, axis=1)


        payoff = payoff * tparams[_p(prefix, 'W_h')].dimshuffle('x', 0, 'x', 'x')
        payoff = tensor.sum(payoff,axis=1)
        
        br = tensor.sum(payoff * prob_a.dimshuffle(0, 'x', 1), axis=2)
        out = br
        # remove weighted_features, br when done with visualisation
        return tensor.nnet.softmax(out * lam), weighted_features, br
def param_init_hid_layer(options, params, prefix='hidden',
                         nin=None, nout=None, rng=None, init=xavier_weight, b_offset=0.):
    if nin is None:
        nin = options['n_players']
    params[_p(prefix, 'W')] = init(nin, nout, rng=rng)
    params[_p(prefix, 'b')] = floatx(b_offset + numpy.zeros((nout,)))
    return params
def param_init_output(options, params, constraints, rng, nin, prefix=''):
    if options['shared_ld']:
        return params, constraints
    else:
        params['ld'] = floatx(rng.uniform(size=(nin),
                                          low=0.1,
                                          high=0.9))
        # constrain level distribution to be on the simplex
        params['ld'] /= params['ld'].sum()
        constraints['simplex'] = constraints.get('simplex', []) + ['ld']
        return params, constraints
def init_level_dist(params, upper_bound, nin, rng, constraints):
    '''
    Initialize a shared level distribution in the action response layers. 
    '''
    if 'ld' not in params:
        params['ld'] = floatx(rng.uniform(size=(nin),
                                          low=0.1,
                                          high=0.9))
        # constrain level distribution to be on the simplex
        params['ld'] /= params['ld'].sum()
        constraints['simplex'] = constraints.get('simplex', []) + ['ld']
    return params, constraints
def sgd(lr, tparams, grads, inp, cost, use_noise,**kwargs):
    print 'Using SGD'
    gshared = [theano.shared(p.get_value() * floatx(0.), name='%s_grad' % k)
               for k, p in tparams.iteritems()]
    gsup = [(gs, gs + g) for gs, g in zip(gshared, grads)]

    f_grad_shared = theano.function(inp, cost, givens={use_noise: numpy.float32(1.)},
                                    on_unused_input='warn', updates=gsup, allow_input_downcast=True)

    pup = [(p, p - lr * (g))
           for p, g in zip(itemlist(tparams), gshared)]
    f_update = theano.function([lr], [], updates=pup, allow_input_downcast=True)

    return f_grad_shared, f_update, gshared
def list_update(data,
                model,
                batchsize=None,
                f_update=None,
                return_n_obs=False,
                rng=numpy.random.RandomState(None)):
    if batchsize is not None:
        data = sample_minibatch(data, batchsize, rng)
    loss = 0.0
    y_tot = 0
    for i in data:
        n = data[i][1].shape[0]
        X = floatx(data[i][0].reshape(n, 2, i[0], i[1]))
        y = numpy.ndarray.astype(data[i][1], 'int32')
        y_tot += y.sum()
        loss += model(X, y)
        if f_update is not None:
            f_update(0.01)
    if return_n_obs:
        return loss, y_tot
    else:
        return loss
def train(options, data, load_params=False, start_epoc=0):
    print "OPTIONS: ", options
    print 'Setting up model with options:'
    options = set_defaults(options)
    for kk, vv in options.iteritems():
        print kk, vv
    print "model seed: ", options['model_seed']
    print "fold: ", options['fold']
    print 'seed: ', options['seed']
    rng = numpy.random.RandomState(options['model_seed'] +
                                   100 * options.get('fold', 99) +
                                   options.get('seed', 99))
    params, operators = init_params(options, rng)
    print 'done...'

    if load_params:
        loaded = load_par(options)
        start_epoc = resume_epoc(options)
        # Check that we've loaded the correct parameters...
        for kk, vv in loaded.iteritems():
            assert params[kk].shape == vv.shape
            assert type(params[kk]) == type(vv)
        params = loaded

    tparams = init_tparams(params)

    trng, use_noise, inps, out = build_model(tparams, options, rng)
    y = tensor.imatrix('y')
    cost = nll(out, y)

    f_eval = theano.function([inps, y],
                             cost,
                             givens={use_noise: numpy.float32(0.)},
                             on_unused_input='ignore')

    reg = 0.
    for k, v in tparams.iteritems():
        if k[:6] == 'hidden' or k[-3:] == 'W_h':
            reg += options['l1'] * tensor.sum(abs(v))
            reg += options['l2'] * tensor.sum((v)**2)

    cost += reg

    grads = tensor.grad(cost, wrt=itemlist(tparams))
    lr = tensor.scalar(name='lr', dtype=theano.config.floatX)
    opt = get_optim(options['opt'])
    print 'Compiling functions'
    f_grad_shared, f_update, gshared = opt(lr, tparams, grads, [inps, y], cost,
                                           use_noise)
    f_out = theano.function([inps],
                            out,
                            givens={use_noise: numpy.float32(0.)},
                            on_unused_input='ignore',
                            allow_input_downcast=True)

    best = numpy.inf
    print 'Starting training'

    train = list_update(data[0], f_eval, options['batch_size'], rng=rng)
    test = list_update(data[-1], f_eval, options['batch_size'], rng=rng)
    starting = (train, test)
    print 'Pre-training. test: %f, train: %f' % (test, train)
    print 'Training'
    lr = options['lr']
    max_itr = options['max_itr']
    grad_norm = 0.
    train_scores = 50 * [0.]
    try:
        for epoch in xrange(max_itr):
            start_time = time.time()
            for g in gshared:
                # manually set gradients to 0 because we accumulate in list update
                g.set_value(0.0 * g.get_value())
            use_noise.set_value(1.)
            train_cost, n_obs = list_update(data[0],
                                            f_grad_shared,
                                            batchsize=options['batch_size'],
                                            rng=rng,
                                            return_n_obs=True)
            use_noise.set_value(0.)
            for g in gshared:
                g.set_value(floatx(g.get_value() / float(n_obs)))
            f_update(lr)
            apply_proximity(tparams, operators)
            train = list_update(data[0],
                                f_eval,
                                options['batch_size'],
                                rng=rng)
            elapsed_time = time.time() - start_time

            if train < best:
                # early stopping on training set
                test = list_update(data[-1], f_eval)
                best_par = unzip(tparams)
                best_perf = (train, test)
                best = train

            test = list_update(data[-1], f_eval)

            if (epoch % 50) == 0:
                # Save progress....
                save_progress(options, tparams, epoch, best_perf)
                print 'Epoch: %d, cost: %f, train: %f, test: %f, lr:%f, time: %f' % (
                    epoch, train_cost, train, test, lr, elapsed_time)

            # Check if we're diverging...
            train_ave = running_ave(train_scores, train, epoch)

            if epoch > 1000:
                # Only exit if we're diverging after 1000 iterations
                if train_ave > 1.03 * best_perf[0]:
                    print "Diverged..."
                    break
    except KeyboardInterrupt:
        print "Interrupted"
    # check that we're outputing prob distributions
    X = data[0][(3, 3)][0]
    assert abs(
        f_out(X.reshape(X.shape[0], 2, 3, 3)).sum() - float(X.shape[0])) < 1e-4
    print "Best performance:"
    print "train, test"
    print "%f,%f" % best_perf
    return best_perf, best_par
def adam(lr, tparams, grads, inp, cost, use_noise, **kwargs):
    '''
    See: Adam - a method for stochastic optimization. https://arxiv.org/abs/1412.6980

    Note that when using Adam, the lr learning rate parameter does nothing because Adam chooses 
    per-parameter learning rates. If you want to be able to manually turn down the learning rate,
    you can modify the parameter update line:
    p_t = p - (lr_t * g_t)
    to:
    p_t = p - lr * (lr_t * g_t)
    so that you have a global learning rate parameter.
    '''
    gshared = [theano.shared(p.get_value() * floatx(0.), name='%s_grad' % k)
               for k, p in tparams.iteritems()]
    gsup = [(gs, gs + g) for gs, g in zip(gshared, grads)]

    f_grad_shared = theano.function(inp, cost, updates=gsup, allow_input_downcast=True)

    lr0 = floatx(0.0002)
    b1 = floatx(0.1)
    b2 = floatx(0.001)
    e = floatx(1e-8)

    updates = []

    i = theano.shared(floatx(0.))
    i_t = i + floatx(1.)
    fix1 = floatx(1.) - b1**(i_t)
    fix2 = floatx(1.) - b2**(i_t)
    lr_t = lr0 * (tensor.sqrt(fix2) / fix1)

    for p, g in zip(tparams.values(), gshared):
        m = theano.shared(p.get_value() *floatx( 0.))
        v = theano.shared(p.get_value() *floatx( 0.))
        m_t = (b1 * g) + ((floatx(1.) - b1) * m)
        v_t = (b2 * tensor.sqr(g)) + ((floatx(1.) - b2) * v)
        g_t = m_t / (tensor.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))

    f_update = theano.function([lr], [], updates=updates,
                               on_unused_input='ignore', allow_input_downcast=True)

    return f_grad_shared, f_update, gshared