def init_maf(self, n_mades=5, batch_norm=False, maf_actfun='tanh', output_order='random', maf_mode='random', **unused_kwargs): """ :param n_mades: :param batch_norm: :param output_order: :param maf_mode: :param unused_kwargs: :return: """ if batch_norm: raise NotImplementedError # why? self.n_mades, self.batch_norm, self.output_order, self.maf_mode = \ n_mades, batch_norm, output_order, maf_mode self.maf_actfun = maf_actfun for key in unused_kwargs.keys(): print("CMAF ignoring unused input {0}".format(key)) # get previous output/params self.maf_input = ll.get_output(last(self.layer)) prev_params = ll.get_all_params(last(self.layer)) input_shape_cmaf = last(self.layer).output_shape assert len(input_shape_cmaf) == 2 # (batch, input_dim) n_inputs_cmaf = input_shape_cmaf[1] rng_maf = np.random.RandomState(seed=self.gen_newseed()) self.cmaf = ConditionalMaskedAutoregressiveFlow( n_inputs=n_inputs_cmaf, n_outputs=self.n_outputs, n_hiddens=self.n_hiddens, act_fun=self.maf_actfun, n_mades=self.n_mades, batch_norm=self.batch_norm, output_order=self.output_order, mode=self.maf_mode, input=self.maf_input, output=self.params, rng=rng_maf) self.aps = prev_params + self.cmaf.parms self.lprobs = self.cmaf.L # model log-likelihood self.dlprobs = self.lprobs # svi not possible
def init_mdn(self, svi=False, n_components=1, rank=None, mdn_actfun=lnl.tanh, homoscedastic=False, min_precisions=None, **unused_kwargs): """ :param svi: bool Whether to use SVI version or not :param n_components: int :param rank: int :param homoscedastic: bool :param unused_kwargs: dict :param mdn_actfun: lasagne nonlinearity activation function for hidden units :param min_precisions: minimum values for diagonal elements of precision matrix for all components (usually taken to be prior precisions) :return: None """ self.svi, self.n_components, self.rank, self.mdn_actfun,\ self.homoscedastic, self.min_precisions = \ svi, n_components, rank, mdn_actfun, homoscedastic, min_precisions for key in unused_kwargs.keys(): print("MDN ignoring unused input {0}".format(key)) # hidden layers for l in range(len(self.n_hiddens)): self.layer['hidden_' + str(l + 1)] = dl.FullyConnectedLayer( last(self.layer), n_units=self.n_hiddens[l], actfun=self.mdn_actfun, svi=self.svi, name='h' + str(l + 1)) last_hidden = last(self.layer) # mixture layers self.layer['mixture_weights'] = dl.MixtureWeightsLayer(last_hidden, n_units=self.n_components, actfun=lnl.softmax, svi=self.svi, name='weights') self.layer['mixture_means'] = dl.MixtureMeansLayer(last_hidden, n_components=self.n_components, n_dim=self.n_outputs, svi=self.svi, name='means') if self.homoscedastic: PrecisionsLayer = dl.MixtureHomoscedasticPrecisionsLayer else: PrecisionsLayer = dl.MixturePrecisionsLayer # why is homoscedastic an input to the layer init? self.layer['mixture_precisions'] = PrecisionsLayer(last_hidden, n_components=self.n_components, n_dim=self.n_outputs, svi=self.svi, name='precisions', rank=self.rank, homoscedastic=self.homoscedastic, min_precisions=min_precisions) last_mog = [self.layer['mixture_weights'], self.layer['mixture_means'], self.layer['mixture_precisions']] # mixture parameters # a : weights, matrix with shape (batch, n_components) # ms : means, list of len n_components with (batch, n_dim, n_dim) # Us : precision factors, n_components list with (batch, n_dim, n_dim) # ldetUs : log determinants of precisions, n_comp list with (batch, ) self.a, self.ms, precision_out = ll.get_output(last_mog, deterministic=False) self.Us = precision_out['Us'] self.ldetUs = precision_out['ldetUs'] self.comps = { **{'a': self.a}, **{'m' + str(i): self.ms[i] for i in range(self.n_components)}, **{'U' + str(i): self.Us[i] for i in range(self.n_components)}} # log probability of y given the mixture distribution # lprobs_comps : log probs per component, list of len n_components with (batch, ) # probs : log probs of mixture, (batch, ) self.lprobs_comps = [-0.5 * tt.sum(tt.sum((self.params - m).dimshuffle( [0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.ms, self.Us, self.ldetUs)] self.lprobs = (MyLogSumExp(tt.stack(self.lprobs_comps, axis=1) + tt.log(self.a), axis=1) - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # the quantities from above again, but with deterministic=True # --- in the svi case, this will disable injection of randomness; # the mean of weights is used instead self.da, self.dms, dprecision_out = ll.get_output(last_mog, deterministic=True) self.dUs = dprecision_out['Us'] self.dldetUs = dprecision_out['ldetUs'] self.dcomps = { **{'a': self.da}, **{'m' + str(i): self.dms[i] for i in range(self.n_components)}, **{'U' + str(i): self.dUs[i] for i in range(self.n_components)}} self.dlprobs_comps = [-0.5 * tt.sum(tt.sum((self.params - m).dimshuffle( [0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.dms, self.dUs, self.dldetUs)] self.dlprobs = (MyLogSumExp(tt.stack(self.dlprobs_comps, axis=1) + tt.log(self.da), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # parameters of network self.aps = ll.get_all_params(last_mog) # all parameters self.mps = ll.get_all_params(last_mog, mp=True) # means self.sps = ll.get_all_params(last_mog, sp=True) # log stds # weight and bias parameter sets as separate lists self.mps_wp = ll.get_all_params(last_mog, mp=True, wp=True) self.sps_wp = ll.get_all_params(last_mog, sp=True, wp=True) self.mps_bp = ll.get_all_params(last_mog, mp=True, bp=True) self.sps_bp = ll.get_all_params(last_mog, sp=True, bp=True)
def __init__(self, n_inputs=None, n_outputs=None, input_shape=None, n_bypass=0, density='mog', n_hiddens=(10, 10), impute_missing=True, seed=None, n_filters=(), filter_sizes=3, pool_sizes=2, n_rnn=0, **density_opts): """Initialize a mixture density network with custom layers Parameters ---------- n_inputs : int Total input dimensionality (data/summary stats) n_outputs : int Dimensionality of output (simulator parameters) input_shape : tuple Size to which data are reshaped before CNN or RNN n_bypass : int Number of elements at end of input which bypass CNN or RNN density : string Type of density condition on the network, can be 'mog' or 'maf' n_components : int Number of components of the mixture density n_filters : list of ints Number of filters per convolutional layer n_hiddens : list of ints Number of hidden units per fully connected layer n_rnn : None or int Number of RNN units impute_missing : bool If set to True, learns replacement value for NaNs, otherwise those inputs are set to zero seed : int or None If provided, random number generator will be seeded density_opts : dict Options for the density estimator """ if n_rnn > 0 and len(n_filters) > 0: raise NotImplementedError assert isint(n_inputs) and isint(n_outputs)\ and n_inputs > 0 and n_outputs > 0 self.density = density.lower() self.impute_missing = impute_missing self.n_hiddens = list(n_hiddens) self.n_outputs, self.n_inputs = n_outputs, n_inputs self.n_bypass = n_bypass self.n_rnn = n_rnn self.n_filters, self.filter_sizes, self.pool_sizes, n_cnn = \ list(n_filters), filter_sizes, pool_sizes, len(n_filters) if type(self.filter_sizes) is int: self.filter_sizes = [self.filter_sizes for _ in range(n_cnn)] else: assert len(self.filter_sizes) >= n_cnn if type(self.pool_sizes) is int: self.pool_sizes = [self.pool_sizes for _ in range(n_cnn)] else: assert len(self.pool_sizes) >= n_cnn self.iws = tt.vector('iws', dtype=dtype) self.seed = seed if seed is not None: self.rng = np.random.RandomState(seed=seed) else: self.rng = np.random.RandomState() lasagne.random.set_rng(self.rng) self.input_shape = (n_inputs,) if input_shape is None else input_shape assert np.prod(self.input_shape) + self.n_bypass == self.n_inputs assert 1 <= len(self.input_shape) <= 3 # params: output placeholder (batch, self.n_outputs) self.params = tensorN(2, name='params', dtype=dtype) # stats : input placeholder, (batch, self.n_inputs) self.stats = tensorN(2, name='stats', dtype=dtype) # compose layers self.layer = collections.OrderedDict() # input layer, None indicates batch size not fixed at compile time self.layer['input'] = ll.InputLayer( (None, self.n_inputs), input_var=self.stats) # learn replacement values if self.impute_missing: self.layer['missing'] = \ dl.ImputeMissingLayer(last(self.layer), n_inputs=(self.n_inputs,)) else: self.layer['missing'] = \ dl.ReplaceMissingLayer(last(self.layer), n_inputs=(self.n_inputs,)) if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0): last_layer = last(self.layer) bypass_slice = slice(self.n_inputs - self.n_bypass, self.n_inputs) direct_slice = slice(0, self.n_inputs - self.n_bypass) self.layer['bypass'] = ll.SliceLayer(last_layer, bypass_slice) self.layer['direct'] = ll.SliceLayer(last_layer, direct_slice) # reshape inputs prior to RNN or CNN step if self.n_rnn > 0 or n_cnn > 0: if len(n_filters) > 0 and len(self.input_shape) == 2: # 1 channel rs = (-1, 1, *self.input_shape) else: if self.n_rnn > 0: assert len(self.input_shape) == 2 # time, dim else: assert len(self.input_shape) == 3 # channel, row, col rs = (-1, *self.input_shape) # last layer is 'missing' or 'direct' self.layer['reshape'] = ll.ReshapeLayer(last(self.layer), rs) # recurrent neural net, input: (batch, sequence_length, num_inputs) if self.n_rnn > 0: self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn, only_return_final=True) # convolutional net, input: (batch, channels, rows, columns) if n_cnn > 0: for l in range(n_cnn): # add layers if self.pool_sizes[l] == 1: padding = (self.filter_sizes[l] - 1) // 2 else: padding = 0 self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer( name='c' + str(l + 1), incoming=last(self.layer), num_filters=self.n_filters[l], filter_size=self.filter_sizes[l], stride=(1, 1), pad=padding, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lnl.rectify, flip_filters=True, convolution=tt.nnet.conv2d) if self.pool_sizes[l] > 1: self.layer['pool_' + str(l + 1)] = ll.MaxPool2DLayer( name='p' + str(l + 1), incoming=last(self.layer), pool_size=self.pool_sizes[l], stride=None, ignore_border=True) # flatten self.layer['flatten'] = ll.FlattenLayer( incoming=last(self.layer), outdim=2) # incorporate bypass inputs if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0): self.layer['bypass_merge'] = lasagne.layers.ConcatLayer( [self.layer['bypass'], last(self.layer)], axis=1) if self.density == 'mog': self.init_mdn(**density_opts) elif self.density == 'maf': self.init_maf(**density_opts) else: raise NotImplementedError self.compile_funs() # theano functions
def __init__(self, n_inputs, n_outputs, n_components=1, n_filters=[], n_hiddens=[10, 10], n_rnn=None, impute_missing=True, seed=None, svi=True): """Initialize a mixture density network with custom layers Parameters ---------- n_inputs : int or tuple of ints or list of ints Dimensionality of input n_outputs : int Dimensionality of output n_components : int Number of components of the mixture density n_filters : list of ints Number of filters per convolutional layer n_hiddens : list of ints Number of hidden units per fully connected layer n_rnn : None or int Number of RNN units impute_missing : bool If set to True, learns replacement value for NaNs, otherwise those inputs are set to zero seed : int or None If provided, random number generator will be seeded svi : bool Whether to use SVI version or not """ self.impute_missing = impute_missing self.n_components = n_components self.n_filters = n_filters self.n_hiddens = n_hiddens self.n_outputs = n_outputs self.svi = svi self.iws = tt.vector('iws', dtype=dtype) if n_rnn is None: self.n_rnn = 0 else: self.n_rnn = n_rnn if self.n_rnn > 0 and len(self.n_filters) > 0: raise NotImplementedError self.seed = seed if seed is not None: self.rng = np.random.RandomState(seed=seed) else: self.rng = np.random.RandomState() lasagne.random.set_rng(self.rng) # cast n_inputs to tuple if type(n_inputs) is int: self.n_inputs = (n_inputs, ) elif type(n_inputs) is list: self.n_inputs = tuple(n_inputs) elif type(n_inputs) is tuple: self.n_inputs = n_inputs else: raise ValueError('n_inputs type not supported') # compose layers self.layer = collections.OrderedDict() # stats : input placeholder, (batch, *self.n_inputs) if len(self.n_inputs) + 1 == 2: self.stats = tt.matrix('stats', dtype=dtype) elif len(self.n_inputs) + 1 == 3: self.stats = tt.tensor3('stats', dtype=dtype) elif len(self.n_inputs) + 1 == 4: self.stats = tt.tensor4('stats', dtype=dtype) else: raise NotImplementedError # input layer self.layer['input'] = ll.InputLayer((None, *self.n_inputs), input_var=self.stats) # learn replacement values if self.impute_missing: self.layer['missing'] = dl.ImputeMissingLayer( last(self.layer), n_inputs=self.n_inputs) else: self.layer['missing'] = dl.ReplaceMissingLayer( last(self.layer), n_inputs=self.n_inputs) # recurrent neural net # expects shape (batch, sequence_length, num_inputs) if self.n_rnn > 0: if len(self.n_inputs) == 1: rs = (-1, *self.n_inputs, 1) self.layer['rnn_reshape'] = ll.ReshapeLayer( last(self.layer), rs) self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn, only_return_final=True) # convolutional layers # expects shape (batch, num_input_channels, input_rows, input_columns) if len(self.n_filters) > 0: # reshape if len(self.n_inputs) == 1: raise NotImplementedError elif len(self.n_inputs) == 2: rs = (-1, 1, *self.n_inputs) else: rs = None if rs is not None: self.layer['conv_reshape'] = ll.ReshapeLayer( last(self.layer), rs) # add layers for l in range(len(n_filters)): self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer( name='c' + str(l + 1), incoming=last(self.layer), num_filters=n_filters[l], filter_size=3, stride=(2, 2), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lnl.rectify, flip_filters=True, convolution=tt.nnet.conv2d) # flatten self.layer['flatten'] = ll.FlattenLayer(incoming=last(self.layer), outdim=2) # hidden layers for l in range(len(n_hiddens)): self.layer['hidden_' + str(l + 1)] = dl.FullyConnectedLayer( last(self.layer), n_units=n_hiddens[l], svi=svi, name='h' + str(l + 1)) last_hidden = last(self.layer) # mixture layers self.layer['mixture_weights'] = dl.MixtureWeightsLayer( last_hidden, n_units=n_components, actfun=lnl.softmax, svi=svi, name='weights') self.layer['mixture_means'] = dl.MixtureMeansLayer( last_hidden, n_components=n_components, n_dim=n_outputs, svi=svi, name='means') self.layer['mixture_precisions'] = dl.MixturePrecisionsLayer( last_hidden, n_components=n_components, n_dim=n_outputs, svi=svi, name='precisions') last_mog = [ self.layer['mixture_weights'], self.layer['mixture_means'], self.layer['mixture_precisions'] ] # output placeholder self.params = tt.matrix('params', dtype=dtype) # (batch, self.n_outputs) # mixture parameters # a : weights, matrix with shape (batch, n_components) # ms : means, list of len n_components with (batch, n_dim, n_dim) # Us : precision factors, n_components list with (batch, n_dim, n_dim) # ldetUs : log determinants of precisions, n_comp list with (batch, ) self.a, self.ms, precision_out = ll.get_output(last_mog, deterministic=False) self.Us = precision_out['Us'] self.ldetUs = precision_out['ldetUs'] self.comps = { **{ 'a': self.a }, **{'m' + str(i): self.ms[i] for i in range(self.n_components)}, **{'U' + str(i): self.Us[i] for i in range(self.n_components)} } # log probability of y given the mixture distribution # lprobs_comps : log probs per component, list of len n_components with (batch, ) # probs : log probs of mixture, (batch, ) self.lprobs_comps = [ -0.5 * tt.sum(tt.sum( (self.params - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.ms, self.Us, self.ldetUs) ] self.lprobs = (MyLogSumExp(tt.stack(self.lprobs_comps, axis=1) + tt.log(self.a), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # the quantities from above again, but with deterministic=True # --- in the svi case, this will disable injection of randomness; # the mean of weights is used instead self.da, self.dms, dprecision_out = ll.get_output(last_mog, deterministic=True) self.dUs = dprecision_out['Us'] self.dldetUs = dprecision_out['ldetUs'] self.dcomps = { **{ 'a': self.da }, **{'m' + str(i): self.dms[i] for i in range(self.n_components)}, **{'U' + str(i): self.dUs[i] for i in range(self.n_components)} } self.dlprobs_comps = [ -0.5 * tt.sum(tt.sum( (self.params - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.dms, self.dUs, self.dldetUs) ] self.dlprobs = (MyLogSumExp(tt.stack(self.dlprobs_comps, axis=1) + tt.log(self.da), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # parameters of network self.aps = ll.get_all_params(last_mog) # all parameters self.mps = ll.get_all_params(last_mog, mp=True) # means self.sps = ll.get_all_params(last_mog, sp=True) # log stds # weight and bias parameter sets as seperate lists self.mps_wp = ll.get_all_params(last_mog, mp=True, wp=True) self.sps_wp = ll.get_all_params(last_mog, sp=True, wp=True) self.mps_bp = ll.get_all_params(last_mog, mp=True, bp=True) self.sps_bp = ll.get_all_params(last_mog, sp=True, bp=True) # theano functions self.compile_funs() self.iws = tt.vector('iws', dtype=dtype)