def initialize_parameters(self, W_initialiser=Gaussian(std=0.01), b_initialiser=Gaussian(std=0.01)): for p in [self.W, self.V_mu, self.V_sigma, self.V_alpha]: p.set_value( W_initialiser.get_tensor(p.get_value().shape).astype(floatX)) for p in [self.b_mu, self.b_sigma, self.b_alpha]: p.set_value( b_initialiser.get_tensor(p.get_value().shape).astype(floatX)) self.b_sigma.set_value(self.b_sigma.get_value() + 1.0) self.activation_rescaling.set_value( np.ones(self.n_visible, dtype=floatX))
def initialize_parameters_from_dataset(self, dataset, W_initialiser=Gaussian(std=0.01), sample_size=10000): self.Wflags.set_value( W_initialiser.get_tensor(self.Wflags.get_value().shape)) self.W1.set_value(W_initialiser.get_tensor(self.W1.get_value().shape)) self.b1.set_value(W_initialiser.get_tensor(self.b1.get_value().shape)) if self.n_layers > 1: self.Ws.set_value( W_initialiser.get_tensor(self.Ws.get_value().shape)) self.bs.set_value( W_initialiser.get_tensor(self.bs.get_value().shape)) for p in [self.V_mu, self.V_sigma, self.V_alpha]: p.set_value( W_initialiser.get_tensor(p.get_value().shape).astype(floatX)) b_alpha = np.zeros(self.b_alpha.get_value().shape, dtype=floatX) b_mu = np.zeros(self.b_mu.get_value().shape, dtype=floatX) b_sigma = np.zeros(self.b_sigma.get_value().shape, dtype=floatX) data_sample = dataset.sample_data(sample_size)[0].astype(floatX) domains = zip(data_sample.min(axis=0), data_sample.max(axis=0)) for i, (a, b) in enumerate(domains): s = (b - a) / (self.n_components + 1) b_mu[i] = np.arange(1, self.n_components + 1) * s + a b_sigma[i] = s self.b_alpha.set_value(b_alpha) self.b_mu.set_value(b_mu) self.b_sigma.set_value(b_sigma)
def initialize_parameters_cover_domain(self, domains, W_initialiser=Gaussian(std=0.01)): self.Wflags.set_value( W_initialiser.get_tensor(self.Wflags.get_value().shape)) self.W1.set_value(W_initialiser.get_tensor(self.W1.get_value().shape)) self.b1.set_value(W_initialiser.get_tensor(self.b1.get_value().shape)) if self.n_layers > 1: self.Ws.set_value( W_initialiser.get_tensor(self.Ws.get_value().shape)) self.bs.set_value( W_initialiser.get_tensor(self.bs.get_value().shape)) for p in [self.V_mu, self.V_sigma, self.V_alpha]: p.set_value( W_initialiser.get_tensor(p.get_value().shape).astype(floatX)) b_alpha = np.zeros(self.b_alpha.get_value().shape, dtype=floatX) b_mu = np.zeros(self.b_mu.get_value().shape, dtype=floatX) b_sigma = np.zeros(self.b_sigma.get_value().shape, dtype=floatX) for i, (a, b) in enumerate(domains): s = (b - a) / (self.n_components + 1) b_mu[i] = np.arange(1, self.n_components + 1) * s + a b_sigma[i] = s self.b_alpha.set_value(b_alpha) self.b_mu.set_value(b_mu) self.b_sigma.set_value(b_sigma)
def create_from_smaller_NADE(cls, small_NADE, add_n_hiddens=1, W_initialiser=Gaussian(std=0.01), marginal=None): n_visible, n_hidden, n_layers, nonlinearity = ( small_NADE.n_visible, small_NADE.n_hidden, small_NADE.n_layers, small_NADE.parameters["nonlinearity"].get_name()) model = cls(n_visible, n_hidden, n_layers + add_n_hiddens, nonlinearity) # Copy first layer model.Wflags.set_value(small_NADE.Wflags.get_value()) model.W1.set_value(small_NADE.W1.get_value()) model.b1.set_value(small_NADE.b1.get_value()) # Copy the hidden layers from the smaller NADE and initialise the rest Ws = W_initialiser.get_tensor(model.Ws.get_value().shape) bs = W_initialiser.get_tensor(model.bs.get_value().shape) if n_layers > 1: Ws[0:n_layers - 1, :, :] = small_NADE.Ws.get_value()[0:n_layers - 1, :, :] bs[0:n_layers - 1, :] = small_NADE.bs.get_value()[0:n_layers - 1, :] model.Ws.set_value(Ws) model.bs.set_value(bs) model.V.set_value(W_initialiser.get_tensor(model.V.get_value().shape)) if marginal is None: model.c.set_value(small_NADE.c.get_value()) else: model.c.set_value(-np.log((1 - marginal) / marginal).astype(floatX)) return model
def initialize_parameters_from_dataset(self, dataset, W_initialiser=Gaussian(std=0.01), sample_size): self.activation_rescaling.set_value( np.ones(self.n_visible, dtype=theano.config.floatX)) for p in [self.W, self.V_mu, self.V_sigma, self.V_alpha]: p.set_value( W_initialiser.get_tensor(p.get_value().shape).astype( theano.config.floatX)) b_alpha = np.zeros(self.b_alpha.get_value().shape, dtype=theano.config.floatX) b_mu = np.zeros(self.b_mu.get_value().shape, dtype=theano.config.floatX) b_sigma = np.zeros(self.b_sigma.get_value().shape, dtype=theano.config.floatX) data_sample = dataset.sample_data(sample_size)[0].astype(floatX) domains = zip(data_sample.min(axis=0), data_sample.max(axis=0)) for i, (a, b) in enumerate(domains): s = (b - a) / (self.n_components + 1) b_mu[i] = np.arange(1, self.n_components + 1) * s + a b_sigma[i] = np.log(s) self.b_alpha.set_value(b_alpha) self.b_mu.set_value(b_mu) self.b_sigma.set_value(b_sigma)
def initialize_parameters(self, marginal, W_initialiser=Gaussian(std=0.01)): self.Wflags.set_value( W_initialiser.get_tensor(self.Wflags.get_value().shape)) self.W1.set_value(W_initialiser.get_tensor(self.W1.get_value().shape)) self.b1.set_value(W_initialiser.get_tensor(self.b1.get_value().shape)) if self.n_layers > 1: self.Ws.set_value( W_initialiser.get_tensor(self.Ws.get_value().shape)) self.bs.set_value( W_initialiser.get_tensor(self.bs.get_value().shape)) self.V.set_value(W_initialiser.get_tensor(self.V.get_value().shape)) self.c.set_value(-np.log((1 - marginal) / marginal).astype(floatX))
def initialize_parameters_from_dataset(self, dataset, W_initialiser=Gaussian(std=0.01), sample_size=1000): self.Wflags.set_value( W_initialiser.get_tensor(self.Wflags.get_value().shape)) self.W1.set_value(W_initialiser.get_tensor(self.W1.get_value().shape)) self.b1.set_value(W_initialiser.get_tensor(self.b1.get_value().shape)) if self.n_layers > 1: self.Ws.set_value( W_initialiser.get_tensor(self.Ws.get_value().shape)) self.bs.set_value( W_initialiser.get_tensor(self.bs.get_value().shape)) self.V.set_value(W_initialiser.get_tensor(self.V.get_value().shape)) data_sample = dataset.sample_data(sample_size)[0].astype(floatX) marginal = data_sample.mean(axis=0) self.c.set_value(-np.log((1 - marginal) / marginal).astype(floatX))
def initialize_parameters_cover_domain(self, domains, W_initialiser=Gaussian(std=0.01)): self.activation_rescaling.set_value( np.ones(self.n_visible, dtype=floatX)) for p in [self.W, self.V_mu, self.V_sigma, self.V_alpha]: p.set_value( W_initialiser.get_tensor(p.get_value().shape).astype(floatX)) b_alpha = np.zeros(self.b_alpha.get_value().shape, dtype=floatX) b_mu = np.zeros(self.b_mu.get_value().shape, dtype=floatX) b_sigma = np.zeros(self.b_sigma.get_value().shape, dtype=floatX) for i, (a, b) in enumerate(domains): s = (b - a) / (self.n_components + 1) b_mu[i] = np.arange(1, self.n_components + 1) * s + a b_sigma[i] = s self.b_alpha.set_value(b_alpha) self.b_mu.set_value(b_mu) self.b_sigma.set_value(b_sigma)
def create_from_smaller_NADE(cls, small_NADE, add_n_hiddens=1, W_initialiser=Gaussian(std=0.01), domains=None): n_visible, n_hidden, n_layers, n_components, nonlinearity = ( small_NADE.n_visible, small_NADE.n_hidden, small_NADE.n_layers, small_NADE.n_components, small_NADE.parameters["nonlinearity"].get_name()) model = cls(n_visible, n_hidden, n_layers + add_n_hiddens, n_components, nonlinearity) # Copy first layer model.Wflags.set_value(small_NADE.Wflags.get_value()) model.W1.set_value(small_NADE.W1.get_value()) model.b1.set_value(small_NADE.b1.get_value()) # Copy the hidden layers from the smaller NADE and initialise the rest Ws = W_initialiser.get_tensor(model.Ws.get_value().shape) bs = W_initialiser.get_tensor(model.bs.get_value().shape) if n_layers > 1: Ws[0:n_layers - 1, :, :] = small_NADE.Ws.get_value()[0:n_layers - 1, :, :] bs[0:n_layers - 1, :] = small_NADE.bs.get_value()[0:n_layers - 1, :] model.Ws.set_value(Ws) model.bs.set_value(bs) for p in [model.V_mu, model.V_sigma, model.V_alpha]: p.set_value( W_initialiser.get_tensor(p.get_value().shape).astype(floatX)) if domains is None: model.b_alpha.set_value(small_NADE.b_alpha.get_value()) model.b_mu.set_value(small_NADE.b_mu.get_value()) model.b_sigma.set_value(small_NADE.b_sigma.get_value()) else: b_alpha = np.zeros(model.b_alpha.get_value().shape, dtype=floatX) b_mu = np.zeros(model.b_mu.get_value().shape, dtype=floatX) b_sigma = np.zeros(model.b_sigma.get_value().shape, dtype=floatX) for i, (a, b) in enumerate(domains): s = (b - a) / (model.n_components + 1) b_mu[i] = np.arange(1, model.n_components + 1) * s + a b_sigma[i] = s model.b_alpha.set_value(b_alpha) model.b_mu.set_value(b_mu) model.b_sigma.set_value(b_sigma) return model