def addLayer(self, n_units, type): """Adds a new layer to the network, :param n_units: number of units in the layer :param type: a string specification of the activation function """ # check number of units assert util.isposint(n_units), 'Number of units must be a positive integer.' # choose activation function actfun = util.select_theano_act_function(type, dtype) n_prev_units = self.n_outputs self.n_outputs = n_units self.n_units.append(n_units) self.n_layers += 1 self.n_params += (n_prev_units + 1) * n_units W = theano.shared((rng.randn(n_prev_units, n_units) / np.sqrt(n_prev_units + 1)).astype(dtype), name='W' + str(self.n_layers), borrow=True) b = theano.shared(np.zeros(n_units, dtype=dtype), name='b' + str(self.n_layers), borrow=True) h = actfun(tt.dot(self.hs[-1], W) + b) h.name = 'h' + str(self.n_layers) self.Ws.append(W) self.bs.append(b) self.hs.append(h) self.parms = self.Ws + self.bs self.output = self.hs[-1] self.eval_f = None
def __init__(self, n_inputs, n_outputs, n_hiddens, act_fun, n_comps, output_order='sequential', mode='sequential', input=None, output=None): """ Constructor. :param n_inputs: number of (conditional) inputs :param n_outputs: number of outputs :param n_hiddens: list with number of hidden units for each hidden layer :param act_fun: name of activation function :param n_comps: number of gaussian components :param output_order: order of outputs :param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential' :param input: theano variable to serve as input; if None, a new variable is created :param output: theano variable to serve as output; if None, a new variable is created """ # save input arguments self.n_inputs = n_inputs self.n_outputs = n_outputs self.n_hiddens = n_hiddens self.act_fun = act_fun self.n_comps = n_comps self.mode = mode # create network's parameters degrees = create_degrees(n_outputs, n_hiddens, output_order, mode) Ms, Mmp = create_masks(degrees) Mmp_broadcast = Mmp.dimshuffle([0, 1, 'x']) Wx, Ws, bs, Wm, bm, Wp, bp, Wa, ba = create_weights_conditional( n_inputs, n_outputs, n_hiddens, n_comps) self.parms = [Wx] + Ws + bs + [Wm, bm, Wp, bp, Wa, ba] self.output_order = degrees[0] # activation function f = util.select_theano_act_function(act_fun, dtype) # input matrices self.input = tt.matrix('x', dtype=dtype) if input is None else input self.y = tt.matrix('y', dtype=dtype) if output is None else output # feedforward propagation h = f(tt.dot(self.input, Wx) + tt.dot(self.y, Ms[0] * Ws[0]) + bs[0]) h.name = 'h1' for l, (M, W, b) in enumerate(zip(Ms[1:], Ws[1:], bs[1:])): h = f(tt.dot(h, M * W) + b) h.name = 'h' + str(l + 2) # output means self.m = tt.tensordot(h, Mmp_broadcast * Wm, axes=[1, 0]) + bm self.m.name = 'm' # output log precisions self.logp = tt.tensordot(h, Mmp_broadcast * Wp, axes=[1, 0]) + bp self.logp.name = 'logp' # output mixing coefficients self.loga = tt.tensordot(h, Mmp_broadcast * Wa, axes=[1, 0]) + ba self.loga -= tt.log(tt.sum(tt.exp(self.loga), axis=2, keepdims=True)) self.loga.name = 'loga' # random numbers driving made self.u = tt.exp( 0.5 * self.logp) * (self.y.dimshuffle([0, 1, 'x']) - self.m) # log likelihoods self.L = tt.log( tt.sum(tt.exp(self.loga - 0.5 * self.u**2 + 0.5 * self.logp), axis=2)) self.L = -0.5 * n_outputs * np.log(2 * np.pi) + tt.sum(self.L, axis=1) self.L.name = 'L' # train objective self.trn_loss = -tt.mean(self.L) self.trn_loss.name = 'trn_loss' # theano evaluation functions, will be compiled when first needed self.eval_lprob_f = None self.eval_comps_f = None self.eval_us_f = None
def __init__(self, n_inputs, n_hiddens, act_fun, input_order='sequential', mode='sequential', input=None): """ Constructor. :param n_inputs: number of inputs :param n_hiddens: list with number of hidden units for each hidden layer :param act_fun: name of activation function :param input_order: order of inputs :param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential' :param input: theano variable to serve as input; if None, a new variable is created """ # save input arguments self.n_inputs = n_inputs self.n_hiddens = n_hiddens self.act_fun = act_fun self.mode = mode # create network's parameters degrees = create_degrees(n_inputs, n_hiddens, input_order, mode) Ms, Mmp = create_masks(degrees) Ws, bs, Wm, bm, Wp, bp = create_weights(n_inputs, n_hiddens, None) self.parms = Ws + bs + [Wm, bm, Wp, bp] self.input_order = degrees[0] # activation function f = util.select_theano_act_function(act_fun, dtype) # input matrix self.input = tt.matrix('x', dtype=dtype) if input is None else input h = self.input # feedforward propagation for l, (M, W, b) in enumerate(zip(Ms, Ws, bs)): h = f(tt.dot(h, M * W) + b) h.name = 'h' + str(l + 1) # output means self.m = tt.dot(h, Mmp * Wm) + bm self.m.name = 'm' # output log precisions self.logp = tt.dot(h, Mmp * Wp) + bp self.logp.name = 'logp' # random numbers driving made self.u = tt.exp(0.5 * self.logp) * (self.input - self.m) # log likelihoods self.L = -0.5 * (n_inputs * np.log(2 * np.pi) + tt.sum(self.u**2 - self.logp, axis=1)) self.L.name = 'L' # train objective self.trn_loss = -tt.mean(self.L) self.trn_loss.name = 'trn_loss' # theano evaluation functions, will be compiled when first needed self.eval_lprob_f = None self.eval_comps_f = None self.eval_us_f = None