def get_output_for(self, input, deterministic=False, **kwargs):
     if deterministic:
         # use stored mean and std
         mean = self.mean
         std = self.std
     else:
         # use this batch's mean and std
         mean = input.mean(self.axes, keepdims=True)
         std = input.std(self.axes, keepdims=True)
         # and update the stored mean and std:
         # we create (memory-aliased) clones of the stored mean and std
         running_mean = theano.clone(self.mean, share_inputs=False)
         running_std = theano.clone(self.std, share_inputs=False)
         # set a default update for them
         running_mean.default_update = (1 - self.alpha) * running_mean + self.alpha * mean
         running_std.default_update = (1 - self.alpha) * running_std + self.alpha * std
         # and include them in the graph so their default updates will be
         # applied (although the expressions will be optimized away later)
         mean += 0 * running_mean
         std += 0 * running_std
     std += self.epsilon
     mean = T.addbroadcast(mean, *self.axes)
     std = T.addbroadcast(std, *self.axes)
     beta = T.addbroadcast(self.beta, *self.axes)
     gamma = T.addbroadcast(self.gamma, *self.axes)
     normalized = (input - mean) * (gamma / std) + beta
     return self.nonlinearity(normalized)
    def get_output_for(self, input, deterministic=False,
                       batch_norm_use_averages=None,
                       batch_norm_update_averages=None, **kwargs):

        self.count = self.count + 1
        self.alpha = 5.0 / (10 + self.count)
        # self.alpha = 1.0 / (self.count^2)

        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        # Decide whether to use the stored averages or mini-batch statistics
        if batch_norm_use_averages is None:
            batch_norm_use_averages = deterministic
        use_averages = batch_norm_use_averages

        if use_averages:
            mean = self.mean
            inv_std = self.inv_std
        else:
            mean = input_mean
            inv_std = input_inv_std

        # Decide whether to update the stored averages
        if batch_norm_update_averages is None:
            batch_norm_update_averages = not deterministic
        update_averages = batch_norm_update_averages

        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean    
            inv_std += 0 * running_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
Exemple #3
0
    def _init_exprs(self):
        # Here we need to replace the input with a corrupted version. If we do
        # so naively by calling clone on the loss, the targets (which are
        # identical to the inputs in thesense of identity in programming) the
        # targets will be replaced as well. Instead, we just want to thave the
        # inputs replaced. Thus we first clone the output of the model and
        # replace the input with the corrupted input. This will not change the
        # targets. Afterwards, we put that corruption into the loss as well.
        super(DenoisingAutoEncoder, self)._init_exprs()
        if self.noise_type == 'gauss':
            corrupted_inpt = corrupt.gaussian_perturb(
                self.exprs['inpt'], self.c_noise)
        elif self.noise_type == 'mask':
            corrupted_inpt = corrupt.mask(
                self.exprs['inpt'], self.c_noise)

        output_from_corrupt = theano.clone(
            self.exprs['output'],
            {self.exprs['inpt']: corrupted_inpt}
        )

        score = self.exprs['loss']
        loss = theano.clone(
            self.exprs['loss'],
            {self.exprs['output']: output_from_corrupt})

        self.exprs.update(get_named_variables(locals(), overwrite=True))
Exemple #4
0
    def apply_replacements(self, node, deterministic=False,
                           include=None, exclude=None,
                           more_replacements=None):
        """
        Replace variables in graph with variational approximation. By default, replaces all variables

        Parameters
        ----------
        node : Theano Variables (or Theano expressions)
            node or nodes for replacements
        deterministic : bool
            whether to use zeros as initial distribution
            if True - zero initial point will produce constant latent variables
        include : list
            latent variables to be replaced
        exclude : list
            latent variables to be excluded for replacements
        more_replacements : dict
            add custom replacements to graph, e.g. change input source

        Returns
        -------
        node(s) with replacements
        """
        replacements = self.construct_replacements(
            include, exclude, more_replacements
        )
        node = theano.clone(node, replacements, strict=False)
        posterior = self.random(no_rand=deterministic)
        return theano.clone(node, {self.input: posterior}, strict=False)
Exemple #5
0
    def __init__(self, freq, activation, input, target_idx, task_loss, surrogate_loss,
                 hyperparameter, learning_rate, batch_generator, n_batches,
                 factor=1.5, n_updates=10):
        Extension.__init__(self, 'adapt_zloss', freq)

        self.batch_generator = batch_generator
        self.n_batches = n_batches
        self.learning_rate = learning_rate
        self.hyperparameter = hyperparameter
        self.factor = factor
        self.n_updates = n_updates

        # grad = theano.grad(surrogate_loss, activation)
        # new_activation = activation - learning_rate * grad
        self.fun_activation = theano.function([input], activation)

        activation_bis = tensor.matrix()
        surr_loss_bis = theano.clone(surrogate_loss,
                                     replace={activation: activation_bis})
        grad = theano.grad(surr_loss_bis, activation_bis)
        new_activation = activation_bis - 100*learning_rate * grad

        task_loss_bis = theano.clone(task_loss,
                                     replace={activation: new_activation})

        self.fun_update_task_loss = theano.function(
                [activation_bis, target_idx], [task_loss_bis, new_activation])
Exemple #6
0
    def _make_loss_functions(self, mode=None):
        """Return pair (f_loss, f_d_loss) of functions.

         - f_loss returns the current loss,
         - f_d_loss returns the gradient of that loss wrt parameters,
        """
        rng = T.shared_randomstreams.RandomStreams()

        # Drop out inpts.
        inpt = self.exprs['inpt']
        inpt_dropped_out = corrupt.mask(inpt, self.p_dropout_inpt, rng)
        givens = {inpt: inpt_dropped_out}
        loss = theano.clone(self.exprs['loss'], givens)

        n_layers = len(self.n_hiddens)
        for i in range(n_layers - 1):
            # Drop out hidden.
            hidden = self.exprs['hidden_%i' % i]
            hidden_dropped_out = corrupt.mask(hidden, self.p_dropout_hidden, rng)
            givens = {hidden: hidden_dropped_out}
            loss = theano.clone(loss, givens)

        d_loss = T.grad(loss, self.parameters.flat)

        f_loss = self.function(['inpt', 'target'], loss, explicit_pars=True,
                               mode=mode)
        f_d_loss = self.function(['inpt', 'target'], d_loss, explicit_pars=True,
                                 mode=mode)
        return f_loss, f_d_loss
Exemple #7
0
def filter_and_prob(inpt, transition, emission,
           visible_noise_mean, visible_noise_cov,
           hidden_noise_mean, hidden_noise_cov,
           initial_hidden, initial_hidden_cov):
    step = forward_step(
        transition, emission,
        visible_noise_mean, visible_noise_cov,
        hidden_noise_mean, hidden_noise_cov)

    hidden_mean_0 = T.zeros_like(hidden_noise_mean).dimshuffle('x', 0)
    hidden_cov_0 = T.zeros_like(hidden_noise_cov).dimshuffle('x', 0, 1)
    f0, F0, ll0 = step(inpt[0], hidden_mean_0, hidden_cov_0)
    replace = {hidden_noise_mean: initial_hidden, 
               hidden_noise_cov: initial_hidden_cov}
    f0 = theano.clone(f0, replace)
    F0 = theano.clone(F0, replace)
    ll0 = theano.clone(ll0, replace)

    (f, F, ll), _ = theano.scan(
        step,
        sequences=inpt[1:],
        outputs_info=[f0, F0, None])

    ll = ll.sum(axis=0)

    f = T.concatenate([T.shape_padleft(f0), f])
    F = T.concatenate([T.shape_padleft(F0), F])
    ll += ll0

    return f, F, ll
Exemple #8
0
    def forward(self,input_org,train=True,update_batch_stat=True,finetune=False):
        print "Layer/BatchNormalization"
        ldim,cdim,rdim = self._internal_shape(input_org)
        input = input_org.reshape((ldim,cdim,rdim))
        if (train):
            mean = T.mean(input, axis=(0, 2), keepdims=True )
            var = T.mean((input-mean)**2, axis=(0, 2), keepdims=True)

            if(update_batch_stat):
                finetune_N = theano.clone(self.finetune_N, share_inputs=False)
                if(finetune):
                    finetune_N.default_update = finetune_N+1
                    ratio = T.cast(1-1.0/(finetune_N+1),theano.config.floatX)
                else:
                    finetune_N.default_update = 0
                    ratio = self.moving_avg_ratio
                m = ldim*rdim
                scale = T.cast(m/(m-1.0),theano.config.floatX)
                est_mean = theano.clone(self.est_mean, share_inputs=False)
                est_var = theano.clone(self.est_var, share_inputs=False)
                est_mean.default_update = T.cast(ratio*self.est_mean + (1-ratio)*mean,theano.config.floatX)
                est_var.default_update = T.cast(ratio*self.est_var + (1-ratio)*scale*var,theano.config.floatX)
                mean += 0 * est_mean
                var += 0 * est_var
            output = self._pbc(self.gamma) * (input - self._pbc(mean)) \
                     / T.sqrt(1e-6+self._pbc(var)) + self._pbc(self.beta)

        else:
            output = self._pbc(self.gamma) * (input - self._pbc(self.est_mean)) \
                     / T.sqrt(1e-6+self._pbc(self.est_var)) + self._pbc(self.beta)

        return output.reshape(input_org.shape)
Exemple #9
0
 def __call__(self, z):
     if z.ndim > 1:
         a = theano.scan(
             lambda z_: theano.clone(self.op.apply(self.tf), {self.op.input: z_}, strict=False),
             sequences=z, n_steps=z.shape[0])[0].mean()
     else:
         a = theano.clone(self.op.apply(self.tf), {self.op.input: z}, strict=False)
     return tt.abs_(a)
Exemple #10
0
def safe_clone(cost, replace):
    params = replace.keys()
    nw_vals = replace.values()
    dummy_params = [x.type() for x in params]
    dummy_cost = theano.clone(cost,
                              replace=dict(zip(params, dummy_params)))
    return theano.clone(dummy_cost,
                        replace=dict(zip(dummy_params, nw_vals)))
Exemple #11
0
        def step(input, mask, cumsum_grad_att, extra_grad_h, h, h_pre, update, grad_h, C,
                 *prev_grad_params):
            """
            A single timestep of the backward pass.

            Parameters
            ----------
            input: (batch_size, n_in)
            mask: (batch_size,)
            cumsum_grad_att: (batch_size, n_hidden)
            h: (batch_size, n_hidden)
            h_pre: (batch_size, n_hidden)
            update: (batch_size, n_hidden)
            grad_h: (batch_size, n_hidden)
            C: (batch_size, n_hidden, n_hidden)
            *prev_grad_params

            Returns
            -------
            grad_input: (batch_size, n_in)
            grad_h_pre: (batch_size, n_hidden)
            C_pre: (batch_size, n_hidden, n_hidden)
            gradients with respect to the params (both of the recurrent and the
             update rule)
            """
            C_pre = self.attention_update_rule.restore_previous_matrix(C, update)

            att_grads = theano.clone(
                output=[u_grad_h] + u_grad_params,
                replace={u_h: h,
                         u_mask: mask,
                         u_C_pre: C_pre,
                         u_grad_att: cumsum_grad_att,
                         u_query: h})

            grad_h_att = att_grads[0]
            grad_params_att = att_grads[1:]

            grad_h_att *= 1000 / T.sum(seq_mask, axis=0)[:, None]
            grad_h_att = T.switch(mask[:, None], grad_h_att, .0)

            rec_grads = theano.clone(
                output=[back_grad_input, back_grad_h_pre] + back_grad_params,
                replace={back_input: input,
                         back_mask: mask,
                         back_h_pre: h_pre,
                         back_grad_h: extra_grad_h + grad_h + grad_h_att})

            grad_input = rec_grads[0]
            grad_h_pre = rec_grads[1]
            grad_params_rec = rec_grads[2:]

            grad_params = grad_params_att + grad_params_rec
            scan_outputs = [grad_input, grad_h_pre, C_pre]
            for prev_grad, grad in zip(prev_grad_params, grad_params):
                scan_outputs.append(prev_grad + grad)

            return tuple(scan_outputs)
Exemple #12
0
def _elbo_t_new(logp, uw_g, uw_l, inarray_g, inarray_l, 
                n_mcsamples, random_seed):
    """Return expression of approximate ELBO based on Monte Carlo sampling.
    """
    r = MRG_RandomStreams(seed=random_seed)

    if uw_l is not None:
        l_g = (uw_g.size/2).astype('int64')
        u_g = uw_g[:l_g]
        w_g = uw_g[l_g:]
        l_l = (uw_l.size/2).astype('int64')
        u_l = uw_l[:l_l]
        w_l = uw_l[l_l:]
        logp_ = lambda z_g, z_l: theano.clone(
            logp, {inarray_g: z_g, inarray_l: z_l}, strict=False
        )

        if n_mcsamples == 1:
            n_g = r.normal(size=inarray_g.tag.test_value.shape)
            z_g = n_g * tt.exp(w_g) + u_g
            n_l = r.normal(size=inarray_l.tag.test_value.shape)
            z_l = n_l * tt.exp(w_l) + u_l
            elbo = logp_(z_g, z_l) + \
                   tt.sum(w_g) + 0.5 * l_g * (1 + np.log(2.0 * np.pi)) + \
                   tt.sum(w_l) + 0.5 * l_l * (1 + np.log(2.0 * np.pi))
        else:
            ns_g = r.normal(size=inarray_g.tag.test_value.shape)
            zs_g = ns_g * tt.exp(w_g) + u_g
            ns_l = r.normal(size=inarray_l.tag.test_value.shape)
            zs_l = ns_l * tt.exp(w_l) + u_l
            logps, _ = theano.scan(fn=lambda z_g, z_l: logp_(z_g, z_l),
                                   outputs_info=None,
                                   sequences=zip(zs_g, zs_l))
            elbo = tt.mean(logps) + \
                   tt.sum(w_g) + 0.5 * l_g * (1 + np.log(2.0 * np.pi)) + \
                   tt.sum(w_l) + 0.5 * l_l * (1 + np.log(2.0 * np.pi))
    else:
        l_g = (uw_g.size/2).astype('int64')
        u_g = uw_g[:l_g]
        w_g = uw_g[l_g:]

        logp_ = lambda z_g: theano.clone(logp, {inarray_g: z_g}, strict=False)

        if n_mcsamples == 1:
            n_g = r.normal(size=inarray_g.tag.test_value.shape)
            z_g = n_g * tt.exp(w_g) + u_g
            elbo = logp_(z_g) + \
                   tt.sum(w_g) + 0.5 * l_g * (1 + np.log(2.0 * np.pi))
        else:
            n_g = r.normal(size=(n_mcsamples, u_g.tag.test_value.shape[0]))
            zs_g = n_g * tt.exp(w_g) + u_g
            logps, _ = theano.scan(fn=lambda q: logp_(q),
                                   outputs_info=None,
                                   sequences=[zs_g])
            elbo = tt.mean(logps) + \
                   tt.sum(w_g) + 0.5 * l_g * (1 + np.log(2.0 * np.pi))

    return elbo
    def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_var = input.var(self.axes)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
                                  deterministic)
        if use_averages:
            mean = self.mean
            var = self.var
        else:
            mean = input_mean
            var = input_var

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_var = theano.clone(self.var, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_var.default_update = ((1 - self.alpha) * running_var +
                                          self.alpha * input_var)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            var += 0 * running_var

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(self.beta.ndim))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = self.beta.dimshuffle(pattern)
        gamma = self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = T.sqrt(var + self.epsilon)
        std = std.dimshuffle(pattern)

        # normalize
        # normalized = (input - mean) * (gamma / std) + beta
        normalized = T.nnet.batch_normalization(input, gamma=gamma, beta=beta,
                                                mean=mean, std=std,
                                                mode=self.mode)
        return self.nonlinearity(normalized)
Exemple #14
0
    def get_output_for(self, input, deterministic=False, collect=False,
                       **kwargs):

        if collect:
            # use this batch's mean and var
            if self.stat_indices is None:
                mean = input.mean(self.axes, keepdims=True)
                var = input.var(self.axes, keepdims=True)
            else:
                mean = input[self.stat_indices].mean(self.axes, keepdims=True)
                var = input[self.stat_indices].var(self.axes, keepdims=True)
            # and update the stored mean and var:
            # we create (memory-aliased) clones of the stored mean and var
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_var = theano.clone(self.var, share_inputs=False)
            # set a default update for them

            if self.alpha is not 'single_pass':
                running_mean.default_update = (
                    (1 - self.alpha) * running_mean + self.alpha * mean)
                running_var.default_update = (
                    (1 - self.alpha) * running_var + self.alpha * var)
            else:
                print "Collecting using single pass..."
                # this is ugly figure out what can be safely removed...
                running_mean.default_update = (0 * running_mean + 1.0 * mean)
                running_var.default_update = (0 * running_var + 1.0 * var)

            # and include them in the graph so their default updates will be
            # applied (although the expressions will be optimized away later)
            mean += 0 * running_mean
            var += 0 * running_var

        elif deterministic:
            # use stored mean and var
            mean = self.mean
            var = self.var
        else:
            # use this batch's mean and var
            mean = input.mean(self.axes, keepdims=True)
            var = input.var(self.axes, keepdims=True)

        mean = T.addbroadcast(mean, *self.axes)
        var = T.addbroadcast(var, *self.axes)
        normalized = (input - mean) / T.sqrt(var + self.epsilon)

        if self.return_stats:
            return [normalized, mean, var]
        else:
            return normalized
Exemple #15
0
    def _apply(self, x):
        import theano

        input_shape = K.shape(x)
        is_training = K.is_training(x)
        ndim = K.ndim(x)
        self.config(input_shape=input_shape)
        # ====== training mode ====== #
        input_mean = K.mean(x, self.axes)
        input_inv_std = K.inv(K.sqrt(K.var(x, self.axes) + self.epsilon))

        # Decide whether to use the stored averages or mini-batch statistics
        if not is_training:
            mean = self.mean
            inv_std = self.inv_std
        else: # update the stored averages
            mean = input_mean
            inv_std = input_inv_std
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            inv_std += 0 * running_inv_std
        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else K.dimshuffle(self.beta, pattern)
        gamma = 1 if self.gamma is None else K.dimshuffle(self.gamma, pattern)
        mean = K.dimshuffle(mean, pattern)
        inv_std = K.dimshuffle(inv_std, pattern)

        # normalize
        normalized = (x - mean) * (gamma * inv_std) + beta
        # set shape for output
        K.add_shape(normalized, input_shape)
        return self.activation(normalized)
Exemple #16
0
 def set_size_and_deterministic(self, node, s, d):
     initial_local = self._initial_part_matrix('local', s, d)
     initial_global = self._initial_part_matrix('global', s, d)
     # optimizations
     if isinstance(s, int) and (s == 1) or s is None:
         node = theano.clone(node, {
             self.logp: self.single_symbolic_logp
         })
     out = theano.clone(node, {
         self.symbolic_initial_local_matrix: initial_local,
         self.symbolic_initial_global_matrix: initial_global,
     })
     try_to_set_test_value(node, out, None)
     return out
Exemple #17
0
 def clone(**new_inputs):
     new_obj = utils.copy(self)
     # Reorder inputs
     assert len(new_obj.inputs) == len(new_inputs.items())
     pairs=[(x, new_inputs[x.name]) for x in inputs]
     new_obj.inputs = new_inputs.values()
     new_obj.out = theano.clone(new_obj.out, replace=pairs)
     if hasattr(new_obj, 'cost'):
         new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
     if hasattr(new_obj, 'grads'):
         new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
     if hasattr(new_obj, 'sample'):
         new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
     return new_obj
Exemple #18
0
 def clone(self, **new_inputs):
     new_obj = utils.copy(self)
     # Reorder inputs
     assert len(new_obj.inputs) == len(new_inputs.items())
     # TODO: error with inputs arg here. corrected missing self argument, this method must not be used
     pairs = [(x, new_inputs[x.name]) for x in inputs]
     new_obj.inputs = new_inputs.values()
     new_obj.out = theano.clone(new_obj.out, replace=pairs)
     if hasattr(new_obj, 'cost'):
         new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
     if hasattr(new_obj, 'grads'):
         new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
     if hasattr(new_obj, 'sample'):
         new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
     return new_obj
Exemple #19
0
 def single_symbolic_logp(self):
     logp = self.to_flat_input(self.model.logpt)
     loc = self.symbolic_random_local_matrix[0]
     glob = self.symbolic_random_global_matrix[0]
     iloc = self.local_input
     iglob = self.global_input
     return theano.clone(logp, {iloc: loc, iglob: glob})
Exemple #20
0
 def symbolic_log_q_W_local(self):
     mu, rho = self.__local_mu_rho
     mu = self.scale_grad(mu)
     rho = self.scale_grad(rho)
     z = self.symbolic_random_local_matrix
     logp = log_normal(z, mu, rho=rho)
     if self.local_size == 0:
         scaling = tt.constant(1, mu.dtype)
     else:
         scaling = []
         for var in self.local_vars:
             scaling.append(tt.repeat(var.scaling, var.dsize))
         scaling = tt.concatenate(scaling)
     # we need only dimensions here
     # from incoming unobserved
     # to get rid of input_view
     # I replace it with the first row
     # of total_random matrix
     # that always exists
     scaling = self.to_flat_input(scaling)
     scaling = theano.clone(
         scaling, {
             self.local_input: self.symbolic_random_local_matrix[0],
             self.global_input: self.symbolic_random_global_matrix[0]
         })
     logp *= scaling
     logp = logp.sum(1)
     return logp  # shape (s,)
Exemple #21
0
    def logp(self, z):
        factors = ([tt.sum(var.logpt) for var in self.model.basic_RVs] +
                   [tt.sum(var) for var in self.model.potentials])

        p = self.approx.to_flat_input(tt.add(*factors))
        p = theano.clone(p, {self.input: z})
        return p
Exemple #22
0
 def test_cloning_available(self):
     gop = generator(integers())
     res = gop**2
     shared = theano.shared(floatX(10))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.float32(100)
Exemple #23
0
    def sample_node(self, node, size=100, more_replacements=None):
        """
        Samples given node or nodes over shared posterior

        Parameters
        ----------
        node : Theano Variables (or Theano expressions)
        size : scalar
            number of samples
        more_replacements : dict
            add custom replacements to graph, e.g. change input source

        Returns
        -------
        sampled node(s) with replacements
        """
        if more_replacements is not None:  # pragma: no cover
            node = theano.clone(node, more_replacements, strict=False)
        posterior = self.random(size)
        node = self.to_flat_input(node)

        def sample(z):
            return theano.clone(node, {self.input: z}, strict=False)

        nodes, _ = theano.scan(sample, posterior, n_steps=size)
        return nodes
Exemple #24
0
def fuse(building_blocks, fuse_dim=4, input_variables=None, entry_expression=None,
         output_expressions=-1, input_dtype='float32'):

    num_blocks = len(building_blocks)

    if isinstance(output_expressions, numbers.Number):
        output_expressions = [output_expressions]

    # account for indices -1, -2 etc
    output_expressions = [oe % num_blocks for oe in output_expressions]

    if fuse_dim == 4:
        fuse_block = T.tensor4
    else:
        fuse_block = T.matrix

    if input_variables is None and entry_expression is None:
        input_variables = fuse_block(dtype=input_dtype)
        entry_expression = input_variables

    current_expression = entry_expression
    outputs = []

    for i, block in enumerate(building_blocks):
        if not hasattr(block, "expression_"):
            block._build_expression()
        current_expression = theano.clone(
            block.expression_,
            replace={block.input_: current_expression},
            strict=False)
        if i in output_expressions:
            outputs.append(current_expression)

    return outputs, input_variables
Exemple #25
0
    def gradIminibatch_srng(self, x, srng, num_samples, model_type='iwae'):
        # rep_x = T.extra_ops.repeat(x, num_samples, axis=0)
        rep_x = t_repeat(x, num_samples, axis=0)  # works marginally faster than theano's T.extra_ops.repeat
        q_samples = self.q_samplesIx_srng(rep_x, srng)

        log_ws = self.log_weightsIq_samples(q_samples)

        log_ws_matrix = log_ws.reshape((x.shape[0], num_samples))
        log_ws_minus_max = log_ws_matrix - T.max(log_ws_matrix, axis=1, keepdims=True)
        ws = T.exp(log_ws_minus_max)
        ws_normalized = ws / T.sum(ws, axis=1, keepdims=True)
        ws_normalized_vector = T.reshape(ws_normalized, log_ws.shape)

        dummy_vec = T.vector(dtype=theano.config.floatX)

        if model_type in ['vae', 'VAE']:
            print "Training a VAE"
            return collections.OrderedDict([(
                                             param,
                                             T.grad(T.sum(log_ws)/T.cast(num_samples, log_ws.dtype), param)
                                            )
                                            for param in self.params])
        else:
            print "Training an IWAE"
            return collections.OrderedDict([(
                                             param,
                                             theano.clone(
                                                T.grad(T.dot(log_ws, dummy_vec), param),
                                                replace={dummy_vec: ws_normalized_vector})
                                            )
                                            for param in self.params])
Exemple #26
0
 def __call__(self, z, **kwargs):
     if 'more_tf_params' in kwargs:
         m = -1
     else:
         m = 1
     if z.ndim > 1:
         a = theano.scan(
             lambda z_: theano.clone(
                 self.op.apply(self.tf),
                 {self.op.input: z_}, strict=False),
             sequences=z, n_steps=z.shape[0])[0].mean()
     else:
         a = theano.clone(
             self.op.apply(self.tf),
             {self.op.input: z}, strict=False)
     return m * self.op.T(a)
Exemple #27
0
 def test_cloning_available(self):
     gop = pm.Minibatch(np.arange(100), 1)
     res = gop**2
     shared = theano.shared(np.array([10]))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.array([100])
Exemple #28
0
def test_gt_grad():
    """A user test that failed.

    Something about it made Elemwise.grad return something that was
    too complicated for get_scalar_constant_value to recognize as being 0, so
    gradient.grad reported that it was not a valid gradient of an
    integer.

    """
    floatX = config.floatX
    T = theano.tensor

    input_ = T.vector(dtype=floatX)
    random_values = numpy.random.RandomState(1234).uniform(low=-1,
                                                           high=1,
                                                           size=(2, 2))
    W_values = numpy.asarray(random_values, dtype=floatX)
    W = theano.shared(value=W_values, name='weights')
    correct_score = T.dot(input_, W)
    wrong_input = T.vector(dtype=floatX)
    wrong_score = theano.clone(correct_score, {input_: wrong_input})
    # Hinge loss

    scores = T.ones_like(correct_score) - correct_score + wrong_score
    cost = (scores * (scores > 0)).sum()
    T.grad(cost, input_)
Exemple #29
0
    def score_function(self,
                       sc_n_mc=None,
                       more_replacements=None,
                       fn_kwargs=None):  # pragma: no cover
        R"""Compiles scoring function that operates which takes no inputs and returns Loss

        Parameters
        ----------
        sc_n_mc : `int`
            number of scoring MC samples
        more_replacements:
            Apply custom replacements before compiling a function
        fn_kwargs: `dict`
            arbitrary kwargs passed to theano.function

        Returns
        -------
        theano.function
        """
        if fn_kwargs is None:
            fn_kwargs = {}
        if not self.op.RETURNS_LOSS:
            raise NotImplementedError('%s does not have loss' % self.op)
        if more_replacements is None:
            more_replacements = {}
        loss = theano.clone(self(sc_n_mc), more_replacements, strict=False)
        return theano.function([], loss, **fn_kwargs)
Exemple #30
0
 def logp_(z_g, z_l):
     return theano.clone(logp,
                         OrderedDict({
                             inarray_g: z_g,
                             inarray_l: z_l
                         }),
                         strict=False)
Exemple #31
0
def _elbo_t(logp, uw, inarray, n_mcsamples, random_seed):
    """Create Theano tensor of approximate ELBO by Monte Carlo sampling.
    """
    l = (uw.size / 2).astype('int64')
    u = uw[:l]
    w = uw[l:]

    # Callable tensor
    logp_ = lambda input: theano.clone(logp, {inarray: input}, strict=False)

    # Naive Monte-Carlo
    if random_seed is None:
        r = MRG_RandomStreams(gen_random_state())
    else:
        r = MRG_RandomStreams(seed=random_seed)

    if n_mcsamples == 1:
        n = r.normal(size=inarray.tag.test_value.shape)
        q = n * tt.exp(w) + u
        elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))
    else:
        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
        qs = n * tt.exp(w) + u
        logps, _ = theano.scan(fn=lambda q: logp_(q),
                               outputs_info=None,
                               sequences=[qs])
        elbo = tt.mean(logps) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))

    return elbo
Exemple #32
0
 def __init__(self, rng, P_input, L2_input, **kwargs):
     #symbol declaration, initialization and definition
     x_1_tm1, x_t = (\
             sparse.csr_matrix("x_1_tm1", dtype=theano.config.floatX),\
             sparse.csr_matrix("x_t",dtype=theano.config.floatX)\
         )\
         if P_input is None else P_input[:2]
     
     #elements of history
     shape = kwargs.get("shape")
     if shape is not None:
         dict_size = shape[0]
         if len(shape) <= 1:
             del shape["shape"]
         else:
             shape["shape"] = shape["shape"][1:]
     else:
         dict_size = (16,1,32,32)
     D_1_tm1 = theano.shared(rng.normal(size=dict_size).astype(theano.config.floatX))        
     Dx_1_tm1 = sparse.dot(x_1_tm1, D_1_tm1)#array access=dot operation      
     super(SequenceCNN, self).__init__(rng=rng, inputsymbol=Dx_1_tm1, **kwargs)#attaches new elements into the fgraph
     self.L2_output_1_tm1 = self.L2_output
     
     #elements of current time
     D_t = theano.shared(rng.normal(size=dict_size).astype(theano.config.floatX))        
     Dx_t = sparse.dot(x_t, D_t)#array access=dot operation
     self.L2_output_t = theano.clone(self.L2_output_1_tm1, replace={Dx_1_tm1:Dx_t})
     
     #element prepartion for model building
     self.P_input = (x_1_tm1,x_t)
     self.params += [D_1_tm1, D_t]
     self.L2_output = self.L2_output_1_tm1*self.L2_output_t
Exemple #33
0
    def check_mat_rop_lop(self, y, out_shape):
        vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
        yv = tensor.Rop(y, self.mx, self.mv)
        rop_f = function([self.mx, self.mv], yv)
        sy, _ = theano.scan( lambda i,y,x,v: (tensor.grad(y[i],x)*v).sum(),
                           sequences = tensor.arange(y.shape[0]),
                           non_sequences = [y,self.mx,self.mv])
        scan_f = function([self.mx,self.mv], sy)


        v1 = rop_f(vx,vv)
        v2 = scan_f(vx,vv)

        assert numpy.allclose(v1,v2), ('ROP mismatch: %s %s' % (v1, v2))

        self.check_nondiff_rop( theano.clone(y,
                                             replace={self.mx:break_op(self.mx)}))

        vv = numpy.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
        yv = tensor.Lop(y, self.mx, self.v)
        lop_f = function([self.mx, self.v], yv)

        sy = tensor.grad((self.v*y).sum(), self.mx)
        scan_f = function([self.mx, self.v], sy)


        v1 = lop_f(vx,vv)
        v2 = scan_f(vx,vv)
        assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
Exemple #34
0
def _elbo_t(logp, uw, inarray, n_mcsamples, random_seed):
    """Create Theano tensor of approximate ELBO by Monte Carlo sampling.
    """
    l = (uw.size / 2).astype('int64')
    u = uw[:l]
    w = uw[l:]

    # Callable tensor
    logp_ = lambda input: theano.clone(logp, {inarray: input}, strict=False)

    # Naive Monte-Carlo
    r = MRG_RandomStreams(seed=random_seed)

    if n_mcsamples == 1:
        n = r.normal(size=inarray.tag.test_value.shape)
        q = n * exp(w) + u
        elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))
    else:
        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
        qs = n * exp(w) + u
        logps, _ = theano.scan(fn=lambda q: logp_(q),
                               outputs_info=None,
                               sequences=[qs])
        elbo = tt.mean(logps) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))

    return elbo
Exemple #35
0
    def get_output_for(self, input, deterministic=False, **kwargs):
        """ Binary dense layer dot product computation
        """
        if(self.xnor):
            # binarize the input
            bin_input, beta = binarize_fc_input(input)

            # compute weight scaling factor.
            self.Wb, alpha = binarize_fc_weights(self.W)
            if not deterministic:
                old_alpha = theano.clone(self.xalpha, share_inputs=False)
                old_alpha.default_update = alpha
                alpha += 0*old_alpha
            else:
                alpha = self.xalpha

            #W_full_precision = self.Wb * alpha.dimshuffle('x', 0)
            Wr = self.W
            self.W = self.Wb
                
            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)
            # scale the output by alpha and beta
            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
            # The super class method automatically adds bias. Somehow need to overcome this..
            # may subtract the bias, scale by alpha and beta ans then add bias ?
            fc_out = fc_out * beta.dimshuffle(0, 'x')

            fc_out = fc_out * alpha.dimshuffle('x', 0)
            
            #self.W = W_full_precision
            self.W = Wr
        else:
            fc_out = super(DenseLayer, self).get_output_for(input, **kwargs)

        return fc_out
Exemple #36
0
    def testBackward():        
        #x_s = theano.shared(np.random.normal(0.0, 0.1, size=(1,patchSize*patchSize)).astype('float32'))
        x_s = theano.shared(np.zeros((1, patchSize*patchSize), dtype='float32'))
        y_s = theano.shared(np.ones((1,), dtype='int32'))
        c = classifier.validation_cost(y) + 0.01*T.sum(abs(x))
        loss = theano.clone(c, {x:x_s, y:y_s})                           
        upd = lasagne.updates.rmsprop(loss, [x_s], learning_rate=0.01)
        func = theano.function(inputs=[],
                               outputs = loss,
                               updates = upd)



        for i in range(10000):
            res = func()
            print("Loss: {0}".format(res))
            #if i%100 == 0:
            #    img = x_s.get_value(borrow=False)
            #    img = img.reshape((patchSize, patchSize))
            #    plt.imshow(img, cmap='gray')
            #    plt.show()            

        img = x_s.get_value(borrow=False)
        img = img.reshape((patchSize, patchSize))
        plt.imshow(img, cmap='gray')
        plt.show()            
Exemple #37
0
def corrupt(exprs, name, typ, pars):
    f_corrupt = lookup(typ, _corrupt)
    if 'true_loss' not in exprs:
        exprs['true_loss'] = exprs['loss']
    uncorrupted = exprs[name]
    corrupted = f_corrupt(uncorrupted, **pars)
    exprs['loss'] = theano.clone(exprs['loss'], {uncorrupted: corrupted})
Exemple #38
0
def test_gt_grad():
    """A user test that failed.

    Something about it made Elemwise.grad return something that was
    too complicated for get_scalar_constant_value to recognize as being 0, so
    gradient.grad reported that it was not a valid gradient of an
    integer.

    """
    floatX = config.floatX
    T = theano.tensor

    input_ = T.vector(dtype=floatX)
    random_values = numpy.random.RandomState(1234).uniform(
                                                low=-1, high=1, size=(2, 2))
    W_values = numpy.asarray(random_values, dtype=floatX)
    W = theano.shared(value=W_values, name='weights')
    correct_score = T.dot(input_, W)
    wrong_input = T.vector(dtype=floatX)
    wrong_score = theano.clone(correct_score, {input_: wrong_input})
    # Hinge loss

    scores = T.ones_like(correct_score) - correct_score + wrong_score
    cost = (scores * (scores > 0)).sum()
    T.grad(cost, input_)
Exemple #39
0
 def test_cloning_available(self):
     gop = generator(integers())
     res = gop ** 2
     shared = theano.shared(floatX(10))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.float32(100)
Exemple #40
0
def cpu_to_gpu_graph(inputs, outputs):
    """ Converts a cpu-only subgraph into a gpu-only subgraph

    >>> x, y = theano.tensor.matrix('x'), theano.tensor.matrix('y')
    >>> z = theano.tensor.dot(x, y)
    >>> gpu_inputs, gpu_outputs = cpu_to_gpu_graph((x,y), (z,))
    >>> f = theano.function(gpu_inputs, gpu_outputs)
    >>> theano.printing.debugprint(f)
    GpuDot22 [@A] ''   0
     |gpu_x [@B]
     |gpu_y [@C]
    """

    math_opt = theano.compile.optdb.query('-inplace', '+fast_run', '-gpu')
    gpu_opt  = cuda.opt.gpu_optimizer.query('+gpu', '-inplace', '-async')
    gpu_comm = cuda.opt.gpu_cut_copies.query('+gpu')

    gpu_inputs, cpu_inputs = zip(*map(cpu_to_gpu_var, inputs))
    outputs2 = theano.clone(outputs, replace=dict(zip(inputs, cpu_inputs)))
    gpu_outputs = map(theano.sandbox.cuda.basic_ops.gpu_from_host, outputs2)

    fgraph = theano.FunctionGraph(gpu_inputs, gpu_outputs)
    math_opt.optimize(fgraph)
    gpu_opt.optimize(fgraph)
    gpu_comm.optimize(fgraph)
    fgraph.disown()

    for go, co in zip(gpu_outputs, outputs):
        go.name = gpu_name(co.name)

    return tuple(gpu_inputs), tuple(gpu_outputs)
Exemple #41
0
def prior_dlogp(vars, model, flat_view):
    """Returns the gradient of the prior on the parameters as a vector of size D x 1"""
    terms = tt.concatenate(
        [theano.grad(var.logpt, var).flatten() for var in vars], axis=0)
    dlogp = theano.clone(terms, flat_view.replacements, strict=False)

    return dlogp
Exemple #42
0
 def test_cloning_available(self):
     gop = pm.Minibatch(np.arange(100), 1)
     res = gop ** 2
     shared = theano.shared(np.array([10]))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.array([100])
    def get_output(self, input, **kwargs):
        input_mean = input.mean(self.axes)
        input_invstd = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = self.deterministic
        if use_averages:
            mean = self.mean
            invstd = self.invstd
        else:
            mean = input_mean
            invstd = input_invstd

        # Decide whether to update the stored averages
        update_averages = self.update_averages and not use_averages
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_invstd = theano.clone(self.invstd, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_invstd.default_update = (
                (1 - self.alpha) * running_invstd + self.alpha * input_invstd)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            invstd += 0 * running_invstd

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = [
            'x' if input_axis in self.axes else next(param_axes)
            for input_axis in range(input.ndim)
        ]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        invstd = invstd.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * invstd) + beta
        return self.activation(normalized)
Exemple #44
0
    def __init__(self, components):
        """Constructor.

        Parameters
        ----------
        * `components` [list of `DistributionMixin`]:
            The components to join together.
        """
        super(Join, self).__init__()
        self.components = components

        for i, component in enumerate(components):
            # Add component parameters, constants and observeds
            if isinstance(component, TheanoDistribution):
                for p_i in component.parameters_:
                    self.parameters_.add(p_i)
                for c_i in component.constants_:
                    self.constants_.add(c_i)
                for o_i in component.observeds_:
                    self.observeds_.add(o_i)

        # Derive and overide pdf and nll analytically if possible
        if all([hasattr(c, "pdf_") for c in self.components]):
            # pdf
            c0 = self.components[0]
            self.pdf_ = theano.clone(c0.pdf_, {c0.X: self.X[:, 0:c0.ndim]})
            start = c0.ndim

            for c in self.components[1:]:
                self.pdf_ *= theano.clone(
                    c.pdf_, {c.X: self.X[:, start:start+c.ndim]})
                start += c.ndim

            self._make(self.pdf_, "pdf")

        if all([hasattr(c, "nll_") for c in self.components]):
            # nll
            c0 = self.components[0]
            self.nll_ = theano.clone(c0.nll_, {c0.X: self.X[:, 0:c0.ndim]})
            start = c0.ndim

            for c in self.components[1:]:
                self.nll_ += theano.clone(
                    c.nll_, {c.X: self.X[:, start:start+c.ndim]})
                start += c.ndim

            self._make(self.nll_, "nll")
Exemple #45
0
    def gradIminibatch_srng(self,
                            x,
                            srng,
                            num_samples,
                            model_type='iwae',
                            backward_pass='******'):
        # rep_x = T.extra_ops.repeat(x, num_samples, axis=0)
        rep_x = t_repeat(
            x, num_samples,
            axis=0)  # works marginally faster than theano's T.extra_ops.repeat
        q_samples = self.q_samplesIx_srng(rep_x, srng)

        log_ws = self.log_weightsIq_samples(q_samples)

        log_ws_matrix = log_ws.reshape((x.shape[0], num_samples))

        # for alpha divergence (take 0 <= alpha <= 1)
        # see the math to show why we can directly set alpha = 1,
        # with reparameterization trick
        if backward_pass == 'full':
            log_ws_matrix *= (1.0 - self.alpha)

            log_ws_minus_max = log_ws_matrix - T.max(
                log_ws_matrix, axis=1, keepdims=True)
            ws = T.exp(log_ws_minus_max)
            ws_normalized = ws / T.sum(ws, axis=1, keepdims=True)
            ws_normalized_vector = T.reshape(ws_normalized, log_ws.shape)

            dummy_vec = T.vector(dtype=theano.config.floatX)

        else:
            # just take the particle that has the largest (unnormalised) weight
            # NOTE: might pick different particles for different datapoint!
            log_ws_max = log_ws_matrix.max(axis=1)

        if backward_pass == 'max':
            print "Training an AAE with largest particle"
            return collections.OrderedDict([
                (param,
                 T.grad(T.sum(log_ws_max) / T.cast(1, log_ws.dtype), param))
                for param in self.params
            ])
        elif model_type in ['vae', 'VAE']:
            print "Training a VAE"
            return collections.OrderedDict([
                (param,
                 T.grad(
                     T.sum(log_ws) / T.cast(num_samples, log_ws.dtype), param))
                for param in self.params
            ])
        else:
            print "Training an AAE with alpha = %.2f, k = %d" % (self.alpha,
                                                                 num_samples)
            return collections.OrderedDict([
                (param,
                 theano.clone(T.grad(T.dot(log_ws, dummy_vec), param),
                              replace={dummy_vec: ws_normalized_vector}))
                for param in self.params
            ])
Exemple #46
0
    def inner_replacer(graph):
        new_graph = replacer(graph)

        other_inputs = []
        constants = []
        for input_ in gof.graph.inputs([new_graph]):
            if isinstance(input_, gof.Variable):
                if isinstance(input_, gof.Constant):
                    constants.append(input_)
                else:
                    other_inputs.append(input_)

        # foreign inputs are fgraph inputs and shared variables that we need
        # to access through inner inputs
        foreign_inputs = list(set(other_inputs) - set(outer_to_inner.values()))

        # skip further processing if there is nothing to do
        if not constants and not foreign_inputs:
            return new_graph

        replacements = []

        # constants just need to be replaced by copies that the inner
        # `fg` can take ownership of
        for input_ in constants:
            new_input = input_.clone()
            new_input.name = f"{new_input.name}_copied"
            replacements.append((input_, new_input))

        for outer_input in foreign_inputs:
            if getattr(outer_input, "update", False):
                # when theano.scan() constructs a scan node, it detects
                # shared variables with updates and returns these updates
                # to the user.  we need to do the same thing for every new
                # use of such a variable that is introduced.  it's hard to
                # do that at this point.
                # shared variables with updates inside the inner graph of
                # OpFromGraph are not supported at all, so we don't support
                # introducing those either.
                raise NotImplementedError(
                    f"Replacement introduces shared variable {outer_input} "
                    "which has an update associated with it into "
                    f"the inner graph of {containing_op}. This is not currently "
                    "supported.")
            # if this foreign input is not already available
            # as an inner input, connect it through a new
            # inner input
            if outer_input not in outer_to_inner.keys():
                inner_input = utils.safe_new(outer_input, tag="_copy")
                outer_to_inner[outer_input] = inner_input
                extra_inner_inputs.append(inner_input)
                extra_outer_inputs.append(outer_input)

        replacements.extend(outer_to_inner.items())

        (new_graph, ) = theano.clone([new_graph],
                                     share_inputs=True,
                                     replace=replacements)
        return new_graph
Exemple #47
0
 def normalizing_constant(self):
     t = self.to_flat_input(
         tt.max([v.scaling for v in self.model.basic_RVs]))
     t = theano.clone(t, {self.input: tt.zeros(self.total_size)})
     # if not scale_cost_to_minibatch: t=1
     t = tt.switch(self.scale_cost_to_minibatch, t,
                   tt.constant(1, dtype=t.dtype))
     return t
Exemple #48
0
    def set_size_and_deterministic(self, node, s, d):
        """
        Replaces self.symbolic_n_samples and self._deterministic_flag
        with non symbolic input. Used whenever user specifies
        `sample size` and `deterministic` option
        """
        initial_local = self._initial_part_matrix('local', s, d)
        initial_global = self._initial_part_matrix('global', s, d)

        # optimizations
        if isinstance(s, int) and (s == 1) or s is None:
            node = theano.clone(node, {self.logp: self.single_symbolic_logp})
        return theano.clone(
            node, {
                self.symbolic_initial_local_matrix: initial_local,
                self.symbolic_initial_global_matrix: initial_global,
            })
 def symbsample_X(self, Y=None, X=None):
     """
     TODO: Write docstring
     """
     if Y is None: Y = self.Y
     if X is None: X = self.lat_ev_model.get_X()
     Xgen = self.lat_ev_model.get_X()
     Nsamps, Tbins = Y.shape[0], Y.shape[1]
     
     TheChol = theano.clone(self.TheChol, replace={self.Y : Y, Xgen : X})
     postX = theano.clone(self.postX, replace={self.Y : Y, Xgen : X})
     normSamps = srng.normal([Nsamps, Tbins, self.xDim])
     
     noise, _ = theano.scan(lambda tc1, tc2, ns : 
                            blk_chol_inv(tc1, tc2, ns, lower=False, transpose=True), 
                            sequences=(TheChol[0], TheChol[1], normSamps))
     return postX + noise
Exemple #50
0
 def forward_pass(self, z0):
     ret = theano.clone(self.forward, {self.root.z0: z0})
     try:
         ret.tag.test_value = np.random.normal(
             size=z0.tag.test_value.shape).astype(self.z0.dtype)
     except AttributeError:
         ret.tag.test_value = self.root.z0.tag.test_value
     return ret
Exemple #51
0
    def make_L_fn(self, loss, params):
        grads = theano.grad(loss, params)

        params_next = [x - 1. / self.L * g for x, g in zip(params, grads)]
        loss_next = theano.clone(loss, replace=zip(params, params_next))
        sq_sum = sum((g**2).sum() for g in grads)

        return theano.function([self.input_var, self.target_var], [loss_next, sq_sum])
Exemple #52
0
 def logp_norm(self, z):
     t = self.approx.normalizing_constant
     factors = ([tt.sum(var.logpt) / t for var in self.model.basic_RVs] +
                [tt.sum(var) / t for var in self.model.potentials])
     logpt = tt.add(*factors)
     p = self.approx.to_flat_input(logpt)
     p = theano.clone(p, {self.input: z})
     return p
Exemple #53
0
    def __call__(self, input):
        """ Replaces the single input of symbolic variable to be the passed argument.

        Parameters
        ----------
        input : TensorVariable
        """
        oldinput, = inputvars(self.tensor)
        return theano.clone(self.tensor, {oldinput: input}, strict=False)
Exemple #54
0
    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.
        """
        # TEST ROP
        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = theano.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input="ignore")

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "ROP mismatch: %s %s" % (v1, v2)

        known_fail = False
        try:
            tensor.Rop(theano.clone(y, replace={self.x: break_op(self.x)}),
                       self.x, self.v)
        except ValueError:
            known_fail = True

        # TEST LOP

        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = theano.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "LOP mismatch: %s %s" % (v1, v2)

        if known_fail:
            pytest.skip("Rop does not handle non-differentiable inputs "
                        "correctly. Bug exposed by fixing Add.grad method.")
Exemple #55
0
 def test_gen_cloning_with_shape_change(self, datagen):
     gen = generator(datagen)
     gen_r = tt_rng().normal(size=gen.shape).T
     X = gen.dot(gen_r)
     res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
     assert res.eval().shape == (50,)
     shared = theano.shared(datagen.data.astype(gen.dtype))
     res2 = theano.clone(res, {gen: shared**2})
     assert res2.eval().shape == (1000,)
 def compute_Entropy(self, Y=None, X=None):
     if Y is None: Y = self.Y
     if X is None: X = self.X
     Xgen = self.lat_ev_model.get_X()
     Nsamps, Tbins = Y.shape[0], Y.shape[1]
     
     LnDeterminant = theano.clone(self.LnDeterminant, replace={self.Y : Y, Xgen : X})
     Entropy = 0.5*LnDeterminant + 0.5*Nsamps*Tbins*(1 + np.log(2*np.pi))*self.xDim  # Yuanjun has xDim here so I put it but I don't think this is right.
     return Entropy
Exemple #57
0
 def logp_norm(self):
     sized_symbolic_logp = self.approx.sized_symbolic_logp
     if self.use_histogram:
         sized_symbolic_logp = theano.clone(
             sized_symbolic_logp,
             dict(
                 zip(self.approx.symbolic_randoms,
                     self.approx.collect('histogram'))))
     return sized_symbolic_logp / self.approx.symbolic_normalizing_constant
Exemple #58
0
def _batch_normalization(input_variable,
                         name,
                         mode_switch,
                         alpha=0.5,
                         strict=True):
    """Based on batch normalization by Jan Schluter for Lasagne"""
    raise ValueError("NYI")
    G_name = name + '_G'
    B_name = name + '_B'
    list_of_names = [G_name, B_name]
    if not names_in_graph(list_of_names, graph):
        input_dim = calc_expected_dims(graph, input_variable)[-1]
        np_G = np_ones((input_dim, ))
        np_B = np_zeros((input_dim, ))
        add_arrays_to_graph([np_G, np_B], list_of_names, graph, strict=strict)
    else:
        if strict:
            raise AttributeError(
                "Name %s already found in graph with strict mode!" % name)
    G, B = fetch_from_graph(list_of_names, graph)
    eps = 1E-20
    batch_mean = input_variable.mean(axis=0, keepdims=True)
    batch_std = input_variable.std(axis=0, keepdims=True)
    running_mean_shape = calc_expected_dims(graph, batch_mean)
    running_std_shape = calc_expected_dims(graph, batch_std)
    running_mean = theano.clone(batch_mean, share_inputs=True)
    running_std = theano.clone(batch_std, share_inputs=True)
    running_mean, running_std = add_random_to_graph(
        [running_mean, running_std], [running_mean_shape, running_std_shape],
        [name + '_running_mean', name + '_running_std'], graph)
    running_mean.default_update = ((1 - alpha) * running_mean +
                                   alpha * batch_mean)
    running_std.default_update = ((1 - alpha) * running_std +
                                  alpha * batch_std)
    running_mean = tensor.addbroadcast(running_mean, 0)
    running_std = tensor.addbroadcast(running_std, 0)
    batch_mean += 0 * running_mean
    batch_std += 0 * running_std
    # include running_{mean, std} in computation graph for updates...
    fixed = (input_variable - running_mean) / (running_std + eps)
    batch = (input_variable - batch_mean) / (batch_std + eps)
    normed = (1 - mode_switch) * batch + mode_switch * fixed
    out = G * normed + B
    return out
Exemple #59
0
    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.

        """
        # TEST ROP
        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input='ignore')

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))
        known_fail = False
        try:
            self.check_nondiff_rop(
                theano.clone(y, replace={self.x: break_op(self.x)}))
        except AssertionError:
            known_fail = True

        # TEST LOP

        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=out_shape),
                           theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

        if known_fail:
            raise KnownFailureTest(
                "Rop doesn't handle non-differentiable "
                "inputs correctly. Bug exposed by fixing Add.grad"
                " method.")
    def get_output_for(self, input, deterministic=False, **kwargs):
        beta = self.beta;
        if not deterministic:
            self_beta = theano.clone(self.beta, share_inputs=False);
            input_beta = ttt.percentile(input, self.perc);
            self_beta.default_update = ((1 - self.alpha) * self_beta + self.alpha * input_beta);
            beta += 0 * self_beta;

        # thresholding
        return theano.tensor.nnet.sigmoid(self.tight*(input-beta+self.bias));