def __init__(self, n_inputs, n_hiddens, act_fun, n_mades, batch_norm=True, input_order='sequential', mode='sequential', input=None, rng=np.random): """ Constructor. :param n_inputs: number of inputs :param n_hiddens: list with number of hidden units for each hidden layer :param act_fun: name of activation function :param n_mades: number of mades :param batch_norm: whether to use batch normalization between mades :param input_order: order of inputs of last made :param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential' :param input: theano variable to serve as input; if None, a new variable is created """ # save input arguments self.n_inputs = n_inputs self.n_hiddens = n_hiddens self.act_fun = act_fun self.n_mades = n_mades self.batch_norm = batch_norm self.mode = mode self.input = tt.matrix('x', dtype=dtype) if input is None else input self.parms = [] self.mades = [] self.bns = [] self.u = self.input self.logdet_dudx = 0.0 for i in xrange(n_mades): # create a new made made = mades.GaussianMade(n_inputs, n_hiddens, act_fun, input_order, mode, self.u, rng) self.mades.append(made) self.parms += made.parms input_order = input_order if input_order == 'random' else made.input_order[::-1] # inverse autoregressive transform self.u = made.u self.logdet_dudx += 0.5 * tt.sum(made.logp, axis=1) # batch normalization if batch_norm: bn = layers.BatchNorm(self.u, n_inputs) self.u = bn.y self.parms += bn.parms self.logdet_dudx += tt.sum(bn.log_gamma) - 0.5 * tt.sum(tt.log(bn.v)) self.bns.append(bn) self.input_order = self.mades[0].input_order # log likelihoods self.L = -0.5 * n_inputs * np.log(2 * np.pi) - 0.5 * tt.sum(self.u ** 2, axis=1) + self.logdet_dudx self.L.name = 'L' # train objective self.trn_loss = -tt.mean(self.L) self.trn_loss.name = 'trn_loss' # theano evaluation functions, will be compiled when first needed self.eval_lprob_f = None self.eval_grad_f = None self.eval_us_f = None
def train_made(n_hiddens, act_fun, mode): assert is_data_loaded(), 'Dataset hasn\'t been loaded' model = mades.GaussianMade(data.n_dims, n_hiddens, act_fun, mode=mode) train(model, a_made) save_model(model, 'made', mode, n_hiddens, act_fun, None, False)