def __init__(self, x, y, mask, n_inputs, n_outputs, s_hiddens, s_act, t_hiddens, t_act): """ Constructor of the backward computation graph. :param x: theano array, the conditional input :param y: theano array, the output :param mask: theano array, a mask indicating which outputs are unchanged :param n_inputs: int, number of conditional inputs :param n_outputs: int, number of outputs :param s_hiddens: list of hidden widths for the scale net :param s_act: string, activation function for the scale net :param t_hiddens: list of hidden widths for the translate net :param t_act: string, activation function for the translate net """ # save input arguments self.mask = mask self.n_inputs = n_inputs self.n_outputs = n_outputs self.s_hiddens = s_hiddens self.s_act = s_act self.t_hiddens = t_hiddens self.t_act = t_act # masked output my = mask * y # scale function self.s_net = nn.FeedforwardNet(n_inputs + n_outputs, tt.concatenate([x, my], axis=1)) for h in s_hiddens: self.s_net.addLayer(h, s_act) self.s_net.addLayer(n_outputs, 'linear') # translate function self.t_net = nn.FeedforwardNet(n_inputs + n_outputs, tt.concatenate([x, my], axis=1)) for h in t_hiddens: self.t_net.addLayer(h, t_act) self.t_net.addLayer(n_outputs, 'linear') # output s = self.s_net.output t = self.t_net.output self.u = my + (1.0 - mask) * tt.exp(-s) * (y - t) # log det dx/dy self.logdet_dudx = -tt.sum((1.0 - mask) * s, axis=1) # parameters self.parms = self.s_net.parms + self.t_net.parms # theano evaluation function, will be compiled when first needed self.eval_forward_f = None
def two_sample_test_classifier(x0, x1, rng=np.random): """ Classifier-based two sample test. Given two datasets, trains a binary classifier to discriminate between them, and reports how well it does. :param x0: first dataset :param x1: second dataset :param rng: random generator to use :return: discrimination accuracy """ import ml.models.neural_nets as nn import ml.trainers as trainers import ml.loss_functions as lf # create dataset x0 = np.asarray(x0) x1 = np.asarray(x1) n_x0, n_dims = x0.shape n_x1 = x1.shape[0] n_data = n_x0 + n_x1 assert n_dims == x1.shape[1], 'inconsistent sizes' xs = np.vstack([x0, x1]) ys = np.hstack([np.zeros(n_x0), np.ones(n_x1)]) # split in training / validation sets n_val = int(n_data * 0.1) xs_val, ys_val = xs[:n_val], ys[:n_val] xs_trn, ys_trn = xs[n_val:], ys[n_val:] # create classifier classifier = nn.FeedforwardNet(n_dims) classifier.addLayer(n_dims * 10, 'relu', rng=rng) classifier.addLayer(n_dims * 10, 'relu', rng=rng) classifier.addLayer(1, 'logistic', rng=rng) # train classifier trn_target, trn_loss = lf.CrossEntropy(classifier.output) val_target, val_loss = lf.CrossEntropy(classifier.output) trainer = trainers.SGD( model=classifier, trn_data=[xs_trn, ys_trn], trn_loss=trn_loss, trn_target=trn_target, val_data=[xs_val, ys_val], val_loss=val_loss, val_target=val_target ) trainer.train( minibatch=100, patience=20, monitor_every=1, logger=None ) # measure accuracy pred = classifier.eval(xs)[:, 0] > 0.5 acc = np.mean(pred == ys) return acc
def eval_forward(self, x, u): """ Evaluates the layer forward, i.e. from input x and random numbers u to output y. :param x: numpy array :param u: numpy array :return: numpy array """ if self.eval_forward_f is None: # conditional input tt_x = tt.matrix('x') # masked random numbers tt_u = tt.matrix('u') mu = self.mask * tt_u # scale net s_net = nn.FeedforwardNet(self.n_inputs + self.n_outputs, tt.concatenate([tt_x, mu], axis=1)) for h in self.s_hiddens: s_net.addLayer(h, self.s_act) s_net.addLayer(self.n_outputs, 'linear') util.copy_model_parms(self.s_net, s_net) s = s_net.output # translate net t_net = nn.FeedforwardNet(self.n_inputs + self.n_outputs, tt.concatenate([tt_x, mu], axis=1)) for h in self.t_hiddens: t_net.addLayer(h, self.t_act) t_net.addLayer(self.n_outputs, 'linear') util.copy_model_parms(self.t_net, t_net) t = t_net.output # transform (x,u) -> y y = mu + (1.0 - self.mask) * (tt_u * tt.exp(s) + t) # compile theano function self.eval_forward_f = theano.function(inputs=[tt_x, tt_u], outputs=y) return self.eval_forward_f(x.astype(dtype), u.astype(dtype))
def create_net(rng=np.random): """ Creates a network with logistic output. """ n_inputs = sim.Prior().n_dims net = nn.FeedforwardNet(n_inputs) net.addLayer(100, 'relu', rng=rng) net.addLayer(100, 'relu', rng=rng) net.addLayer(1, 'logistic', rng=rng) return net
def eval_forward(self, u): """ Evaluates the layer forward, i.e. from random numbers u to output x. :param u: numpy array :return: numpy array """ if self.eval_forward_f is None: # masked random numbers tt_u = tt.matrix('u') mu = self.mask * tt_u # scale net s_net = nn.FeedforwardNet(self.n_inputs, mu) for h in self.s_hiddens: s_net.addLayer(h, self.s_act) s_net.addLayer(self.n_inputs, 'linear') util.copy_model_parms(self.s_net, s_net) s = s_net.output # translate net t_net = nn.FeedforwardNet(self.n_inputs, mu) for h in self.t_hiddens: t_net.addLayer(h, self.t_act) t_net.addLayer(self.n_inputs, 'linear') util.copy_model_parms(self.t_net, t_net) t = t_net.output # transform u -> x x = mu + (1.0 - self.mask) * (tt_u * tt.exp(s) + t) # compile theano function self.eval_forward_f = theano.function( inputs=[tt_u], outputs=x ) return self.eval_forward_f(u.astype(dtype))
def __init__(self, n_inputs, n_hiddens, act_fun, n_outputs, n_components, rng=np.random): """ Constructs an mdn with a given architecture. Note that the mdn has full precision matrices. :param n_inputs: dimensionality of the input :param n_hiddens: list with number of hidden units in the net :param act_fun: activation function type to use in the net :param n_outputs: dimensionality of the output :param n_components: number of mixture components :return: None """ # check if inputs are of the right type assert util.math.isposint( n_inputs), 'Number of inputs must be a positive integer.' assert util.math.isposint( n_outputs), 'Number of outputs must be a positive integer.' assert util.math.isposint( n_components), 'Number of components must be a positive integer.' assert isinstance( n_hiddens, list ), 'Number of hidden units must be a list of positive integers.' for h in n_hiddens: assert util.math.isposint( h ), 'Number of hidden units must be a list of positive integers.' assert act_fun in ['logistic', 'tanh', 'linear', 'relu', 'softplus'], 'Unsupported activation function.' # construct the net self.net = nn.FeedforwardNet(n_inputs) for h in n_hiddens: self.net.addLayer(h, act_fun, rng) self.input = self.net.input # mixing coefficients self.Wa = theano.shared( (rng.randn(self.net.n_outputs, n_components) / np.sqrt(self.net.n_outputs + 1)).astype(dtype), name='Wa', borrow=True) self.ba = theano.shared(rng.randn(n_components).astype(dtype), name='ba', borrow=True) self.a = tt.nnet.softmax(tt.dot(self.net.hs[-1], self.Wa) + self.ba) # mixture means # the mean of each component is calculated separately. consider vectorizing this self.Wms = [ theano.shared((rng.randn(self.net.n_outputs, n_outputs) / np.sqrt(self.net.n_outputs + 1)).astype(dtype), name='Wm' + str(i), borrow=True) for i in xrange(n_components) ] self.bms = [ theano.shared(rng.randn(n_outputs).astype(dtype), name='bm' + str(i), borrow=True) for i in xrange(n_components) ] self.ms = [ tt.dot(self.net.hs[-1], Wm) + bm for Wm, bm in izip(self.Wms, self.bms) ] # mixture precisions # note that U here is an upper triangular matrix such that U'*U is the precision self.WUs = [ theano.shared((rng.randn(self.net.n_outputs, n_outputs**2) / np.sqrt(self.net.n_outputs + 1)).astype(dtype), name='WU' + str(i), borrow=True) for i in xrange(n_components) ] self.bUs = [ theano.shared(rng.randn(n_outputs**2).astype(dtype), name='bU' + str(i), borrow=True) for i in xrange(n_components) ] aUs = [ tt.reshape( tt.dot(self.net.hs[-1], WU) + bU, [-1, n_outputs, n_outputs]) for WU, bU in izip(self.WUs, self.bUs) ] triu_mask = np.triu(np.ones([n_outputs, n_outputs], dtype=dtype), 1) diag_mask = np.eye(n_outputs, dtype=dtype) self.Us = [ triu_mask * aU + diag_mask * tt.exp(diag_mask * aU) for aU in aUs ] ldetUs = [tt.sum(tt.sum(diag_mask * aU, axis=2), axis=1) for aU in aUs] # log probabilities self.y = tt.matrix('y') L_comps = [ -0.5 * tt.sum(tt.sum( (self.y - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in izip(self.ms, self.Us, ldetUs) ] self.L = tt.log( tt.sum(tt.exp(tt.stack(L_comps, axis=1) + tt.log(self.a)), axis=1)) - (0.5 * n_outputs * np.log(2 * np.pi)) self.trn_loss = -tt.mean(self.L) # all parameters in one container self.parms = self.net.parms + [ self.Wa, self.ba ] + self.Wms + self.bms + self.WUs + self.bUs # save these for later self.n_inputs = self.net.n_inputs self.n_outputs = n_outputs self.n_components = n_components self.act_fun = act_fun # theano evaluation functions, will be compiled when first needed self.eval_comps_f = None self.eval_lprobs_f = None self.eval_grad_f = None self.eval_score_f = None