Exemple #1
0
 def sample():
     vis = initial_vis
     for i in xrange(n_steps):
         hid = self.propup(vis)
         vis = self.propdown(hid)
     add_update(initial_vis, vis)
     return vis, hid
Exemple #2
0
        def cd_function(*input_signals):

            wake_visible = input_signals if input_layers is None else up_path(*input_signals)
            wake_hidden = propup(*wake_visible)

            initial_hidden =[theano.shared(np.zeros(wh.tag.test_value.shape, dtype = theano.config.floatX), name = 'persistent_hidden_state') for wh in wake_hidden] \
                if persistent else wake_hidden

            gibbs_path = [(hidden_layers, visible_layers)] + [(visible_layers, hidden_layers), (hidden_layers, visible_layers)] * (n_gibbs-1)
            sleep_visible = self.get_inference_function(hidden_layers, visible_layers, gibbs_path)(*initial_hidden)
            sleep_hidden = propup(*sleep_visible)

            all_params = sum([x.parameters for x in ([self.layers[i] for i in visible_layers]
                +[self.layers[i] for i in hidden_layers]+[self.bridges[i, j] for i in visible_layers for j in hidden_layers])], [])

            if method == 'free_energy':
                cost = free_energy(*wake_visible).mean() - free_energy(*sleep_visible).mean()
            elif method == 'energy':
                cost = tt.mean(wake_visible.T.dot(wake_hidden) - sleep_visible.T.dot(sleep_hidden))
            else:
                bad_value(method)

            optimizer(cost = cost, parameters = all_params, constants = wake_visible+sleep_visible)

            if persistent:
                for p, s in zip(initial_hidden, sleep_hidden):
                    add_update(p, s)
Exemple #3
0
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    rx = kd_x/as_floatx(kp_x+kd_x)
    re = kd_e/as_floatx(kp_e+kd_e)
    scale = (1./as_floatx(kp_x*kp_e + kp_x*kd_e + kd_x*kp_e))
    n_samples, n_in, n_out = shapes
    x_past_var = create_shared_variable(np.zeros((n_samples, n_in)))
    e_past_var = create_shared_variable(np.zeros((n_samples, n_out)))
    x_past = x_past_var*rx
    e_past = e_past_var*re
    w_grad = scale * (xs.T.dot(e_past+es) + x_past.T.dot(es))
    add_update(x_past_var, x_past + xs)
    add_update(e_past_var, e_past + es)
    return w_grad
Exemple #4
0
def running_average(data):
    n_points = theano.shared(np.array(1).astype(int))
    avg = theano.shared(np.zeros_like(data.tag.test_value).astype(theano.config.floatX))
    new_avg = data*(1./n_points) + avg*(n_points-1.)/n_points
    add_update(avg, new_avg)
    add_update(n_points, n_points+1)
    return new_avg
Exemple #5
0
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    rx = kd_x / as_floatx(kp_x + kd_x)
    re = kd_e / as_floatx(kp_e + kd_e)
    scale = (1. / as_floatx(kp_x * kp_e + kp_x * kd_e + kd_x * kp_e))
    n_samples, n_in, n_out = shapes
    x_past_var = create_shared_variable(np.zeros((n_samples, n_in)))
    e_past_var = create_shared_variable(np.zeros((n_samples, n_out)))
    x_past = x_past_var * rx
    e_past = e_past_var * re
    w_grad = scale * (xs.T.dot(e_past + es) + x_past.T.dot(es))
    add_update(x_past_var, x_past + xs)
    add_update(e_past_var, e_past + es)
    return w_grad
Exemple #6
0
        def train(wake_visible):

            wake_hidden = propup(wake_visible)

            persistent_state = sleep_hidden = theano.shared(
                np.zeros(wake_hidden.tag.test_value.shape,
                         dtype=theano.config.floatX),
                name='persistend_hidden_state') if persistent else wake_hidden

            for _ in xrange(n_gibbs):
                sleep_visible = propdown(sleep_hidden)
                sleep_hidden = propup(sleep_visible)

            wake_energy = bridge.free_energy(
                wake_visible) + hidden_layer.free_energy(bridge(wake_visible))
            sleep_energy = bridge.free_energy(
                sleep_visible) + hidden_layer.free_energy(
                    bridge(sleep_visible))
            cost = tt.mean(wake_energy - sleep_energy)

            params = visible_layer.parameters + bridge.parameters + hidden_layer.parameters
            optimizer(cost=cost,
                      parameters=params,
                      constants=[wake_visible, sleep_visible])

            if persistent:
                add_update(persistent_state, sleep_hidden)
Exemple #7
0
 def __call__(self, x):
     # x should have
     assert x.ishape[0]==1, "This method only works for minibatches of size 1, but you used a minibatch of size: %s" % (x.tag.test_value.shape[0])
     running_mean = create_shared_variable(np.zeros(x.tag.test_value.shape[1:]))
     new_running_mean = running_mean * self.decay_constant + x[0] * (1-self.decay_constant).astype(theano.config.floatX)
     add_update(running_mean, new_running_mean)
     return x - running_mean
Exemple #8
0
def running_average(data):
    n_points = theano.shared(np.array(1).astype(int))
    avg = theano.shared(
        np.zeros_like(data.tag.test_value).astype(theano.config.floatX))
    new_avg = data * (1. / n_points) + avg * (n_points - 1.) / n_points
    add_update(avg, new_avg)
    add_update(n_points, n_points + 1)
    return new_avg
Exemple #9
0
 def __call__(self):
     (vector_ixs, ), updates = self._get_vector_indices_and_updates()
     full_indices = \
         (vector_ixs, ) if isinstance(self._size, int) else \
         ind2sub(vector_ixs, self._size)
     for var, val in updates:
         add_update(var, val)
     return full_indices
Exemple #10
0
def herd(x, shape=None):
    phi = shared_like(x,
                      name='phi') if shape is None else create_shared_variable(
                          np.zeros(shape), name='phi{}'.format(shape))
    phi_ = phi + x
    s = tt.round(phi_)
    add_update(phi, phi_ - s)
    return s
Exemple #11
0
 def decode(self, y, shape=None):
     xp = shared_like(
         y, name='xp') if shape is None else create_shared_variable(
             np.zeros(shape), name='xp{}'.format(shape))
     div = (self.kp + self.kd)
     x = (y + self.kd * xp) / div
     add_update(xp, x)
     return x
Exemple #12
0
 def __call__(self):
     (vector_ixs, ), updates = self._get_vector_indices_and_updates()
     full_indices = \
         (vector_ixs, ) if isinstance(self._size, int) else \
         ind2sub(vector_ixs, self._size)
     for var, val in updates:
         add_update(var, val)
     return full_indices
Exemple #13
0
 def _update_param(self, param, gradient):
     mean_squared_grad = theano.shared(np.zeros_like(param.get_value()))
     new_mean_squared_grad = self.decay * mean_squared_grad + (
         1 - self.decay) * gradient**2
     delta_p = -self.learning_rate * gradient / tt.maximum(
         tt.sqrt(new_mean_squared_grad), self.epsilon)
     add_update(param, param + delta_p)
     add_update(mean_squared_grad, new_mean_squared_grad)
Exemple #14
0
 def free_sample():
     (visible_state, hidden_state), _ = get_bounce_fcn(
         start_from=start_from,
         n_steps=n_steps,
         return_smooth_visible=return_smooth_visible)(persistent_state)
     add_update(
         persistent_state,
         visible_state if start_from == 'visible' else hidden_state)
     return visible_state, hidden_state
Exemple #15
0
 def __call__(self, inputs):
     if self.scale != 1:
         import theano
         inputs = inputs * np.array(self.scale, dtype=theano.config.floatX)
     inc_phi = self.phi + inputs
     spikes = tt.round(inc_phi)
     new_phi = inc_phi-spikes
     add_update(self.phi, new_phi)
     return spikes
Exemple #16
0
 def __call__(self, x):
     """
     param x: A (n_samples, n_input_maps, size_y, size_x) image/feature tensor
     return: A (n_samples, n_output_maps, size_y-w_size_y+1, size_x-w_size_x+1) tensor
     """
     result = tt.nnet.conv2d(input=x, filters=self.w, border_mode=self.border_mode, filter_flip=self.filter_flip) + self.bias_switch*(self.b[:, None, None] if self.b is not False else 0)
     if self.b is not False:
         add_update(self.bias_switch, 0)
     return result
Exemple #17
0
    def _update_param(self, param, gradient):

        if self.momentum != 0:
            mom = theano.shared(np.zeros_like(param.get_value()))
            new_mom = self.momentum * mom + gradient
            add_update(mom, new_mom)
            direction = new_mom  # Or mom, something about Nesterov...
        else:
            direction = gradient
        add_update(param, param - self.eta*direction - self.decay*param)
Exemple #18
0
    def _update_param(self, param, gradient):

        if self.momentum != 0:
            mom = theano.shared(np.zeros_like(param.get_value()))
            new_mom = self.momentum * mom + gradient
            add_update(mom, new_mom)
            direction = new_mom  # Or mom, something about Nesterov...
        else:
            direction = gradient
        add_update(param, param - self.eta * direction - self.decay * param)
Exemple #19
0
 def train(x, y):
     w_0 = tt.set_subtensor(w[alpha], 0)  # (n_dim_in, n_dim_out)
     w_1 = tt.set_subtensor(w[alpha], 1)  # (n_dim_in, n_dim_out)
     z_0 = tt.nnet.sigmoid(x.dot(w_0))  # (n_samples, n_dim_out)
     z_1 = tt.nnet.sigmoid(x.dot(w_1))  # (n_samples, n_dim_out)
     log_likelihood_ratio = tt.sum(tt.log(bernoulli(y, z_1))-tt.log(bernoulli(y, z_0)), axis = 0)  # (n_dim_out, )
     p_wa = tt.nnet.sigmoid(log_likelihood_ratio)  # (n_dim_out, )
     w_sample = rng.binomial(p=p_wa)  # (n_dim_out, )
     w_new = tt.set_subtensor(w[alpha], w_sample)  # (n_dim_in, n_dim_out)
     add_update(w, w_new)
     add_update(alpha, (alpha+1) % n_dim_in)
 def train(self, x, y):
     p_wa = self.compute_p_wa(
         self._w, x, y, self._alpha,
         self._possible_ws)  # (n_alpha, n_dim_out, n_possible_ws)
     w_sample = sample_categorical(self._rng,
                                   p_wa,
                                   values=self._possible_ws)
     w_new = tt.set_subtensor(self._w[self._alpha],
                              w_sample)  # (n_dim_in, n_dim_out)
     add_update(self._w, w_new)
     self._add_alpha_update()
    def train(self, x, y):
        p_wa = self.compute_p_wa(self._w, x, y, self._alpha, self._possible_ws)
        phi_alpha = self._phi[self._alpha] + p_wa  # (n_alpha, n_dim_out, n_possible_ws)

        k_chosen = tt.argmax(phi_alpha, axis = 2)  # (n_alpha, n_dim_out)
        selected_phi_indices = (tt.arange(self._alpha.shape[0])[:, None], tt.arange(y.shape[1])[None, :], k_chosen)
        new_phi_alpha = tt.set_subtensor(phi_alpha[selected_phi_indices], phi_alpha[selected_phi_indices]-1)  # (n_alpha, n_dim_out, n_possible_ws)
        w_sample = self._possible_ws[k_chosen]  # (n_alpha, n_dim_out)
        new_phi = tt.set_subtensor(self._phi[self._alpha], new_phi_alpha)  # (n_dim_in, n_dim_out, n_possible_ws)
        w_new = tt.set_subtensor(self._w[self._alpha], w_sample)  # (n_dim_in, n_dim_out)
        add_update(self._w, w_new)
        add_update(self._phi, new_phi)
        self._add_alpha_update()
Exemple #22
0
 def encode(self, x, shape=None):
     running_mag = create_shared_variable(1.)
     add_update(running_mag, (1 - self.adaptation_rate) * running_mag +
                self.adaptation_rate * abs(x).mean())
     target_k_beta = self.k_beta_init * running_mag
     add_update(
         self.k_beta,
         self.k_beta + self.adaptation_rate * (target_k_beta - self.k_beta))
     return pd_encode(x,
                      kp=self.kp,
                      kd=self.kd,
                      quantization=self.quantization,
                      shape=shape)
Exemple #23
0
        def train(wake_visible):

            wake_hidden = self.propup(wake_visible)
            persistent_state = sleep_hidden = create_shared_variable(np.zeros(wake_hidden.tag.test_value.shape),
                name = 'persistend_hidden_state') if persistent else wake_hidden
            for _ in xrange(n_gibbs):
                sleep_visible = self.propdown(sleep_hidden)
                sleep_hidden = self.propup(sleep_visible)
            wake_energy = self.energy(wake_visible)
            sleep_energy = self.energy(sleep_visible)
            cost = wake_energy - sleep_energy
            optimizer(cost = cost, parameters = self.parameters, constants = [wake_visible, sleep_visible])
            if persistent:
                add_update(persistent_state, sleep_hidden)
Exemple #24
0
 def encode(self, x, shape=None):
     if shape is None:
         xp = create_shared_variable(np.zeros((0, )*x.ndim), name='xp')
         delta = ifelse(xp.size>0, x-xp, x)
     else:
         xp = create_shared_variable(np.zeros(shape), name='xp{}'.format(shape))
         delta = x - xp
     add_update(xp, x)
     y = self.kp*x + self.kd*delta
     if self.quantization is None:
         return y
     elif self.quantization=='herd':
         return herd(y, shape=shape)
     else:
         raise Exception('No quantizer: {}'.format(self.quantization))
Exemple #25
0
 def encode(self, x, shape=None):
     if shape is None:
         xp = create_shared_variable(np.zeros((0, ) * x.ndim), name='xp')
         delta = ifelse(xp.size > 0, x - xp, x)
     else:
         xp = create_shared_variable(np.zeros(shape),
                                     name='xp{}'.format(shape))
         delta = x - xp
     add_update(xp, x)
     y = self.kp * x + self.kd * delta
     if self.quantization is None:
         return y
     elif self.quantization == 'herd':
         return herd(y, shape=shape)
     else:
         raise Exception('No quantizer: {}'.format(self.quantization))
Exemple #26
0
 def compute_grad(self, xc, ec, x_true = None, e_true = None):
     """
     :param xc:
     :param ec:
     :param x:
     :param e:
     :return:
     """
     x_past = self.x_past*self.r if x_true is None else x_true*(self.kp+self.kd)-xc
     e_past = self.e_past*self.r if e_true is None else e_true*(self.kp+self.kd)-ec
     w_grad = self.scale * (xc.T.dot(e_past+ec) + x_past.T.dot(ec))
     if x_true is None:
         add_update(self.x_past, x_past + xc)
     if e_true is None:
         add_update(self.e_past, e_past + ec)
     return w_grad
Exemple #27
0
    def forward_pass_and_state(self, x, count_ops = False):
        # s = quantize(x, mode = self.fwd_quantizer, shape = (self.minibatch_size, self.n_in))
        # s = pd_encode(x, kp=self.kp, kd=self.kd, quantization=self.fwd_quantizer, shape = (self.minibatch_size, self.n_in))
        s = self.encdec.encode(x, shape = (self.minibatch_size, self.n_in))

        # if self.n_in==784:
        #     tdbplot(s.reshape((28, 28)), 's')

        # pre_act = pd_decode(s.dot(self.w), kp=self.kp, kd=self.kd, shape= (self.minibatch_size, self.n_out)) + self.b
        pre_act = self.encdec.decode(s.dot(self.w), shape= (self.minibatch_size, self.n_out)) + self.b

        if count_ops:
            add_update(self.fwd_op_count, self.fwd_op_count+abs(s).sum().astype('int64')*self.n_out, accumulate=True)

        # pre_act = s.dot(self.w) + self.b
        out = compute_activation(pre_act, activation_name=self.nonlinearity)
        return out, (x, s, pre_act)
 def multi_step(self, inputs, h_init=None, c_init=None, update_states=True):
     """
     Do a chain of steps and update the internal states
     inputs is a symbolic (n_frames, ...) array
     outputs is a symbolic (n_frames, ...) array
     """
     h_init, c_init = self.get_initial_state(h_init, c_init)
     all_states, updates = theano.scan(
         self.step,
         sequences=[inputs],
         outputs_info=[h_init, c_init],
     )
     h_sequence, c_sequence = all_states
     if update_states:
         add_update(h_init, h_sequence[-1])
         add_update(c_init, c_sequence[-1])
     return h_sequence
        def generate(primer, n_steps):
            """
            Generate a sequence of outputs, and update the internal state.

            primer: A sequence to prime on.  This will overwrite the OUTPUT at
                each time step.  Note: this means the first iteration will run
                off the last output from the previous call to generate.
            n_steps: Number of steps (after the primer) to run.
            return: A sequence of length n_steps.
            """
            n_primer_steps = primer.shape[0]
            n_total_steps = n_primer_steps + n_steps

            def do_step(i, x_, h_, c_):
                """
                i: The step number (int)
                x_: An input vector
                h_: A hiddens state vector
                c_: A memory cell vector
                """
                y_prob, h, c = self.step(x_, h_, c_)
                y_candidate = ifelse(
                    int(stochastic),
                    rng.multinomial(n=1, pvals=y_prob[None, :])[0].astype(
                        theano.config.floatX), y_prob)
                # y_candidate = ifelse(int(stochastic), rng.multinomial(n=1, pvals=y_prob.dimshuffle('x', 1))[0].astype(theano.config.floatX), y_prob)
                y = ifelse(
                    i < n_primer_steps, primer[i], y_candidate
                )  # Note: If you get error here, you just need to prime with something on first call.
                return y, h, c

            (x_gen, h_gen, c_gen), updates = theano.scan(
                do_step,
                sequences=[tt.arange(n_total_steps)],
                outputs_info=[x_init, h_init, c_init],
            )

            if maintain_state:
                updates += [(x_init, x_gen[-1]), (h_init, h_gen[-1]),
                            (c_init, c_gen[-1])]

            for var, val in updates.items():
                add_update(var, val)

            return x_gen[n_primer_steps:],
Exemple #30
0
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros(n_in)+1)
    te_last = create_shared_variable(np.zeros(n_out)+1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
        * rx**(tx_last[x_spike_ixs, None]-t_last)
        * re**(te_last[None, :]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
        * rx**(new_tx_last[:, None]-t_last)
        * re**(te_last[None, e_spike_ixs]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    add_update(x_last, new_x_last)
    add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
    add_update(tx_last, new_tx_last+1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws
Exemple #31
0
    def get_all_signals(self, input_):
        scale = self.get_scale()
        scaled_input = input_*scale

        inc_phi = self.phi + scaled_input
        epsilon = tt.round(inc_phi) - inc_phi
        spikes = inc_phi + epsilon
        # spikes = tt.round(inc_phi)
        new_phi = inc_phi-spikes

        output = spikes / scale
        signals = dict(
            input=input_,
            scaled_input=scaled_input,
            spikes=spikes,
            epsilon=epsilon,
            output=output,
            )
        add_update(self.phi, new_phi)
        return signals
    def train(self, x, y):
        p_wa = self.compute_p_wa(self._w, x, y, self._alpha, self._possible_ws)
        phi_alpha = self._phi[
            self._alpha] + p_wa  # (n_alpha, n_dim_out, n_possible_ws)

        k_chosen = tt.argmax(phi_alpha, axis=2)  # (n_alpha, n_dim_out)
        selected_phi_indices = (tt.arange(self._alpha.shape[0])[:, None],
                                tt.arange(y.shape[1])[None, :], k_chosen)
        new_phi_alpha = tt.set_subtensor(
            phi_alpha[selected_phi_indices], phi_alpha[selected_phi_indices] -
            1)  # (n_alpha, n_dim_out, n_possible_ws)
        w_sample = self._possible_ws[k_chosen]  # (n_alpha, n_dim_out)
        new_phi = tt.set_subtensor(
            self._phi[self._alpha],
            new_phi_alpha)  # (n_dim_in, n_dim_out, n_possible_ws)
        w_new = tt.set_subtensor(self._w[self._alpha],
                                 w_sample)  # (n_dim_in, n_dim_out)
        add_update(self._w, w_new)
        add_update(self._phi, new_phi)
        self._add_alpha_update()
Exemple #33
0
    def forward_pass_and_state(self, x, count_ops=False):
        # s = quantize(x, mode = self.fwd_quantizer, shape = (self.minibatch_size, self.n_in))
        # s = pd_encode(x, kp=self.kp, kd=self.kd, quantization=self.fwd_quantizer, shape = (self.minibatch_size, self.n_in))
        s = self.encdec.encode(x, shape=(self.minibatch_size, self.n_in))

        # if self.n_in==784:
        #     tdbplot(s.reshape((28, 28)), 's')

        # pre_act = pd_decode(s.dot(self.w), kp=self.kp, kd=self.kd, shape= (self.minibatch_size, self.n_out)) + self.b
        pre_act = self.encdec.decode(
            s.dot(self.w), shape=(self.minibatch_size, self.n_out)) + self.b

        if count_ops:
            add_update(self.fwd_op_count,
                       self.fwd_op_count +
                       abs(s).sum().astype('int64') * self.n_out,
                       accumulate=True)

        # pre_act = s.dot(self.w) + self.b
        out = compute_activation(pre_act, activation_name=self.nonlinearity)
        return out, (x, s, pre_act)
Exemple #34
0
def matrix_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes, epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr*rx
    er_decayed = er*re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None]*er_decayed[:, None, :]
    dws = (spikes*(v2-v1))/(rx*re-1)
    new_xr = xr_decayed + xs/(kp_x+kd_x)
    new_er = er_decayed + es/(kp_e+kd_e)

    add_update(v1, tt.switch(spikes, new_xr[:, :, None]*new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
    def train(self, x, target):

        out = self.predict(x)
        delta_w = x.T.dot(target - out)
        delta_b = (target - out).sum(axis = 0)

        recon = self.backward(out)
        delta_w_rev = out.T.dot(x - recon)
        delta_b_rev = (x - recon).sum(axis = 0)

        add_update(self.w, self.w+delta_w)
        add_update(self.w_rev, self.w_rev+delta_w_rev)
        add_update(self.b, self.b+delta_b)
        add_update(self.b_rev, self.b_rev+delta_b_rev)
Exemple #36
0
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: Make this actually use sparsity, one of these days.
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)) + 1)
    te_last = create_shared_variable(np.zeros((n_samples, n_out)) + 1)
    x_last = create_shared_variable(np.zeros((n_samples, n_in)))
    e_last = create_shared_variable(np.zeros((n_samples, n_out)))

    t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :])
    x_spikes = tt.neq(xs, 0)
    dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \
            rx**(tx_last[:, :, None]-t_last) \
            * re**(te_last[:, None, :]-t_last) \
            * geoseries_sum(rx*re, t_end=t_last, t_start=1)
    e_spikes = tt.neq(es, 0)
    dws = (x_spikes[:, :, None] + e_spikes[:, None, :] - x_spikes[:, :, None] *
           e_spikes[:, None, :]) * dw_potentials  # (n_samples, n_in, n_out)

    add_update(
        x_last,
        tt.switch(x_spikes, x_last * rx**tx_last + xs / as_floatx(kd_x),
                  x_last))
    add_update(
        e_last,
        tt.switch(e_spikes, e_last * rx**te_last + es / as_floatx(kd_e),
                  e_last))
    add_update(tx_last, tt.switch(x_spikes, 1, tx_last + 1))
    add_update(te_last, tt.switch(e_spikes, 1, te_last + 1))
    return dws.sum(axis=0)
Exemple #37
0
 def _update_param(self, param, gradient):
     mom1 = theano.shared(np.zeros_like(param.get_value()))
     mom2 = theano.shared(np.zeros_like(param.get_value()))
     mom1_new = mom1 + self._beta_1 * (gradient - mom1)
     mom2_new = tt.maximum(abs(gradient) + self._eps, (1. - self._beta_2) * mom2)
     new_param = param - self._alpha * mom1_new / mom2_new
     add_update(param, new_param)
     add_update(mom1, mom1_new)
     add_update(mom2, mom2_new)
Exemple #38
0
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e,
                                         shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(
        rx * re, t_start=t_last, t_end=0
    )  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (
        xr[:, :, None] * er[:, None, :] * spikes
    ) * sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr * rx + xs / (kp_x + kd_x))
    add_update(er, er * re + es / (kp_e + kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last - 1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last - 1))

    return dw_es.sum(axis=0)
Exemple #39
0
 def _update_param(self, param, gradient):
     mom1 = theano.shared(np.zeros_like(param.get_value()))
     mom2 = theano.shared(np.zeros_like(param.get_value()))
     mom1_new = mom1 + self._beta_1 * (gradient - mom1)
     mom2_new = tt.maximum(
         abs(gradient) + self._eps, (1. - self._beta_2) * mom2)
     new_param = param - self._alpha * mom1_new / mom2_new
     add_update(param, new_param)
     add_update(mom1, mom1_new)
     add_update(mom2, mom2_new)
Exemple #40
0
    def train(x, y):
        p_wa = compute_p_wa(w, x, y, alpha)

        # Now, the herding part... here're the 3 lines from the minipaper
        phi_alpha = phi[alpha] + p_wa
        w_sample = phi_alpha > 0.5
        new_phi_alpha = phi_alpha - w_sample
        add_update(w, tt.set_subtensor(w[alpha], w_sample))
        add_update(phi, tt.set_subtensor(phi[alpha], new_phi_alpha))
        add_update(alpha, (alpha+1) % n_dim_in)
Exemple #41
0
    def _update_param(self, param, gradient):
        # Initialize variables
        i = create_shared_variable(0.)
        m = theano.shared(param.get_value() * 0.)
        v = theano.shared(param.get_value() * 0.)

        # Recompute values
        i_t = i + 1.
        fix1 = 1. - (1. - self.beta_1)**i_t
        fix2 = 1. - (1. - self.beta_2)**i_t
        lr_t = self.alpha * (tt.sqrt(fix2) / fix1)
        m_t = (self.beta_1 * gradient) + ((1. - self.beta_1) * m)
        v_t = (self.beta_2 * tt.sqr(gradient)) + ((1. - self.beta_2) * v)
        g_t = m_t / (tt.sqrt(v_t) + self.eps)
        p_t = param - (lr_t * g_t)
        add_update(param, p_t)
        add_update(m, m_t)
        add_update(v, v_t)
        add_update(i, i_t)
Exemple #42
0
    def _update_param(self, param, gradient):
        # Initialize variables
        i = create_shared_variable(0.)
        m = theano.shared(param.get_value() * 0.)
        v = theano.shared(param.get_value() * 0.)

        # Recompute values
        i_t = i + 1.
        fix1 = 1. - (1. - self.beta_1)**i_t
        fix2 = 1. - (1. - self.beta_2)**i_t
        lr_t = self.alpha * (tt.sqrt(fix2) / fix1)
        m_t = (self.beta_1 * gradient) + ((1. - self.beta_1) * m)
        v_t = (self.beta_2 * tt.sqr(gradient)) + ((1. - self.beta_2) * v)
        g_t = m_t / (tt.sqrt(v_t) + self.eps)
        p_t = param - (lr_t * g_t)
        add_update(param, p_t)
        add_update(m, m_t)
        add_update(v, v_t)
        add_update(i, i_t)
Exemple #43
0
def past_weight_grad_calculator2(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    This attempt never really got off the ground.  It doesn't work
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))




    # xr_new = xr*rx + xs/(kp_x+kd_x)
    # er_new = er*re + es/(kp_e+kd_e)

    arr = rx*re/(1-rx*re)

    xr_new = xr*arr + xs/(kp_x+kd_x)
    er_new = er*arr + es/(kp_e+kd_e)

    xsum = create_shared_variable(np.zeros((n_samples, n_in)))
    esum = create_shared_variable(np.zeros((n_samples, n_out)))

    xsum_new = xsum+xr_new
    esum_new = esum+er_new

    x_nospikes = tt.eq(xs, 0)
    e_nospikes = tt.eq(es, 0)

    dw = xs.T.dot(esum_new) + xsum_new.T.dot(es)

    add_update(xr, xr_new)
    add_update(er, er_new)
    add_update(xsum, xsum_new*x_nospikes)
    add_update(esum, esum_new*e_nospikes)

    return xs.T.dot(er) + xr.T.dot(es)
    # return xr.T.dot(er)
    # return dw
Exemple #44
0
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(rx*re, t_start=t_last, t_end=0)  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (xr[:, :, None]*er[:, None, :]*spikes)*sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr*rx + xs/(kp_x+kd_x))
    add_update(er, er*re + es/(kp_e+kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last-1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last-1))

    return dw_es.sum(axis=0)
Exemple #45
0
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: Make this actually use sparsity, one of these days.
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in))+1)
    te_last = create_shared_variable(np.zeros((n_samples, n_out))+1)
    x_last = create_shared_variable(np.zeros((n_samples, n_in)))
    e_last = create_shared_variable(np.zeros((n_samples, n_out)))

    t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :])
    x_spikes = tt.neq(xs, 0)
    dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \
            rx**(tx_last[:, :, None]-t_last) \
            * re**(te_last[:, None, :]-t_last) \
            * geoseries_sum(rx*re, t_end=t_last, t_start=1)
    e_spikes = tt.neq(es, 0)
    dws = (x_spikes[:, :, None]+e_spikes[:, None, :]-x_spikes[:, :, None]*e_spikes[:, None, :])*dw_potentials  # (n_samples, n_in, n_out)

    add_update(x_last, tt.switch(x_spikes, x_last*rx**tx_last + xs/as_floatx(kd_x), x_last))
    add_update(e_last, tt.switch(e_spikes, e_last*rx**te_last + es/as_floatx(kd_e), e_last))
    add_update(tx_last, tt.switch(x_spikes, 1, tx_last+1))
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws.sum(axis=0)
Exemple #46
0
def matrix_weight_grad_calculator(xs,
                                  es,
                                  kp_x,
                                  kd_x,
                                  kp_e,
                                  kd_e,
                                  shapes,
                                  epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr * rx
    er_decayed = er * re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None] * er_decayed[:, None, :]
    dws = (spikes * (v2 - v1)) / (rx * re - 1)
    new_xr = xr_decayed + xs / (kp_x + kd_x)
    new_er = er_decayed + es / (kp_e + kd_e)

    add_update(v1,
               tt.switch(spikes, new_xr[:, :, None] * new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
Exemple #47
0
    def backward_pass(self, state, grad, cost = None, count_ops = False):
        """
        :param grad: An integer (n_samples, n_dim_out) gradient estimate
        :return: (delta, param_gradients) Where:
            delta: A (n_samples, n_dim_in) integer gradient estimate
        """

        assert (grad is None) != (cost is None), "You can either pass grad or cost"

        ap, ap_q, z = state


        if cost is None:
            filters = tt.grad(compute_activation(z, activation_name=self.nonlinearity).sum(), wrt=z)
            grad_z = filters*grad
        elif grad is None:
            grad_z = tt.grad(cost, wrt=z)

        # sb = quantize(pre_act_grad, mode=self.back_quantizer, shape=(self.minibatch_size, self.n_out))
        # grad_z_q = pd_encode(grad_z, kp=self.kp_back, kd=self.kd_back, quantization=self.back_quantizer, shape=(self.minibatch_size, self.n_out))
        grad_z_q = self.encdec_back.encode(grad_z, shape=(self.minibatch_size, self.n_out))

        if count_ops:
            add_update(self.back_op_count, self.back_op_count+abs(grad_z_q).sum().astype('int64')*self.n_in, accumulate=True)

        # grad_ap = pd_decode(grad_z_q.dot(self.w.T), kp=self.kp_back, kd=self.kd_back, shape=(self.minibatch_size, self.n_in))
        grad_ap = self.encdec_back.decode(grad_z_q.dot(self.w.T), shape=(self.minibatch_size, self.n_in))

        if self.grad_calc in ('true', 'xx', 'recon'):  # Dense op count
            add_update(self.update_op_count, self.back_op_count+self.minibatch_size*self.n_in*self.n_out)
        elif self.grad_calc in ('future', 'future-true', 'past', 'past_step', 'past_reloaded', 'past_matrix'):  # Sparse op count
            add_update(self.update_op_count, self.update_op_count+abs(ap_q).sum().astype('int64')*self.n_out + abs(grad_z_q).sum().astype('int64')*self.n_in)
        else:
            raise NotImplementedError('No op-count method for {}'.format(self.grad_calc))

        w_grad = self._get_past_gradient(ap, grad_z, ap_q, grad_z_q, grad_calc=self.grad_calc)
        # tdbplot(w_grad, self.grad_calc)
        # w_reloaded = self._get_past_gradient(ap, grad_z, ap_q, grad_z_q, grad_calc='past_reloaded')
        # tdbplot(w_reloaded, 'reloaded')

        b_grad = grad_z_q.sum(axis=0) if self.grad_calc[-1]=='s' else grad_z.sum(axis=0)
        return grad_ap, [w_grad, b_grad]
Exemple #48
0
 def _update_param(self, param, gradient):
     add_update(param, param - self._eta*gradient + 2*tt.sqrt(self._eta)*self._rng.normal(size = param.ishape))
Exemple #49
0
 def update(self):
     add_update(self._var, self._var+1)
Exemple #50
0
 def __call__(self):
     counter = theano.shared(np.zeros((), dtype = 'int')+self._initial_value)
     add_update(counter, counter+1)
     return counter
Exemple #51
0
 def lying_function_that_says_its_stateless_but_has_state():
     add_update(var, var+1)
     return var+1
Exemple #52
0
 def honest_function_that_actually_updates():
     add_update(var, var+1)
Exemple #53
0
 def running_sum(x):
     s = create_shared_variable(0.)
     new_s = s+x
     add_update(s, new_s)
     return new_s
Exemple #54
0
 def count(self):
     add_update(self._count_var, self._count_var+1)
     return self._count_var
 def train(self, x, y):
     p_wa = self.compute_p_wa(self._w, x, y, self._alpha, self._possible_ws)  # (n_alpha, n_dim_out, n_possible_ws)
     w_sample = sample_categorical(self._rng, p_wa, values = self._possible_ws)
     w_new = tt.set_subtensor(self._w[self._alpha], w_sample)  # (n_dim_in, n_dim_out)
     add_update(self._w, w_new)
     self._add_alpha_update()
 def _add_alpha_update(self):
     new_alpha = (self._alpha+self._n_alpha) % self._w.shape[0] \
         if self._alpha_update_policy == 'sequential' else \
         self._rng.choice(a=self._w.shape[0], size = (self._n_alpha, ), replace = False).reshape([-1])  # Reshape is for some reason necessary when n_alpha=1
     add_update(self._alpha, new_alpha)
Exemple #57
0
 def _update_param(self, param, gradient):
     add_update(param, param - gradient)
Exemple #58
0
 def _update_param(self, param, gradient):
     add_update(param, param - self._eta * gradient)