Esempio n. 1
0
 def from_init(cls,
               w_init,
               layer_sizes,
               rng=None,
               last_layer_zero=False,
               **init_args):
     """
     :param w_init: Can be:
         - A scalar, in which case w_init will be interpreted as the standard deviation for the Normally distributed initial weights.
         - A function which accepts the shape of the weight matrix as separate arguments.
     :param layer_sizes: A list of layer sizes, including the input layer
     :param rng: A random number generator or seed to use for drawing weights (only when w_init is a scalar)
     :param last_layer_zero: There is no need for the last layer to have initial weights.  If this is True, the weights of
         the last layer will all be zero.
     :param **init_args: See MultiLayerPerceptron constructor
     """
     if hasattr(w_init, '__call__'):
         assert rng is None, "If w_init is callable, the random number generator (rng) doesn't do anything, and shouldn't be specified."
     else:
         rng = get_rng(rng)
         w_init_mag = w_init
         w_init = lambda n_inputs, n_outputs: w_init_mag * rng.randn(
             n_in, n_out)
     weights = [
         w_init(n_in, n_out)
         for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:])
     ]
     if last_layer_zero:
         weights[-1][:] = 0
     return cls(weights=weights, **init_args)
Esempio n. 2
0
def initialize_weight_matrix(n_in,
                             n_out,
                             mag='xavier',
                             base_dist='normal',
                             rng=None):
    """
    Initialize a weight matrix
    :param n_in: Number of input units
    :param n_out: Number of output units
    :param mag: The magnitude, or a string identifying how to calculate the magnitude.
        String options can be:
            'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers.
            'xavier-both': - A compromize between preserving the variance of the forward, backward pass
            'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net.
    :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array
    :param rng: Random number generator or seed
    :return: A shape (n_in, n_out) initial weight matrix.
    """
    rng = get_rng(rng)

    w_base = rng.randn(n_in, n_out) if base_dist == 'normal' else \
        (np.rand(n_in, n_out) - 0.5)*np.sqrt(12) if base_dist=='uniform' else \
        bad_value(base_dist)

    mag_number = \
        np.sqrt(2./(n_in+n_out)) if mag in ('xavier', 'xavier-both') else \
        np.sqrt(1./n_in) if mag=='xavier-forward' else \
        np.sqrt(2./n_in) if mag=='xavier-relu' else \
        mag if isinstance(mag, numbers.Real) else \
        bad_value(mag)

    return w_base * mag_number
Esempio n. 3
0
def lowpass_random(n_samples,
                   cutoff,
                   n_dim=None,
                   rng=None,
                   normalize=False,
                   slope=0):
    """
    Return a random lowpass-filtered signal.
    :param n_samples:
    :param cutoff:
    :param rng:
    :return:
    """
    rng = get_rng(rng)
    assert 0 <= cutoff <= 1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)"
    base_signal = rng.randn(n_samples) if n_dim is None else rng.randn(
        n_samples, n_dim)
    lowpass_signal = lowpass(base_signal, cutoff)
    if normalize is True:
        lowpass_signal = lowpass_signal / np.std(lowpass_signal)
    elif isinstance(normalize, tuple):
        lower, upper = normalize
        minsig, maxsig = np.min(lowpass_signal, axis=0), np.max(lowpass_signal,
                                                                axis=0)
        lowpass_signal = ((lowpass_signal - minsig) /
                          (maxsig - minsig)) * (upper - lower) + lower
    if slope != 0:
        ramp = slope * np.arange(len(lowpass_signal))
        lowpass_signal = lowpass_signal + (ramp
                                           if n_dim is None else ramp[:, None])
    return lowpass_signal
Esempio n. 4
0
def create_maxout_network(layer_sizes,
                          maxout_widths,
                          w_init,
                          output_activation='maxout',
                          rng=None,
                          **other_args):

    rng = get_rng(rng)

    n_expected_maxout_widths = len(
        layer_sizes) - 1 if output_activation == 'maxout' else len(
            layer_sizes) - 2
    if isinstance(maxout_widths, (list, tuple)):
        assert len(maxout_widths) == n_expected_maxout_widths
    else:
        maxout_widths = [maxout_widths] * n_expected_maxout_widths

    weights = [
        w_init * rng.randn(n_maps, n_in, n_out) for n_maps, n_in, n_out in zip(
            maxout_widths, layer_sizes[:-1], layer_sizes[1:])
    ]
    # Note... we're intentionally starting the zip with maxout widths because we know it may be one element shorter than the layer-sizes
    if output_activation != 'maxout':
        weights.append(w_init * rng.randn(layer_sizes[-2], layer_sizes[-1]))
    return MultiLayerPerceptron(weights=weights,
                                hidden_activation='maxout',
                                output_activation=output_activation,
                                **other_args)
Esempio n. 5
0
def initialize_network_params(layer_sizes, mag='xavier-both', base_dist='normal', include_biases = True, scale=1., rng=None):
    """
    Initialize parameters for a fully-connected neural network.

    :param layer_sizes: A list of integers indicating layer sizes (including that of the input layer)
    :param mag: The standard deviation, or a string identifying a method for selecting the standard deviation.
        String options can be:
            'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers.
            'xavier-both': - A compromize between preserving the variance of the forward, backward pass
            'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net.
    :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array
    :param include_biases: Also create initial biases.
    :param rng: A random number generator or seed
    :return: A list of 2-tuples of (weight, bias) parameters (if include_biases is True) otherwise a list of weight matrices.

    Note: To get the weights/biases in separate lists, simply go:
        weights, biases = zip(*initialize_network_params(...))

    Note: See http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization
    For a good explanation of the 'xavier' initialization schemes.
    """
    rng = get_rng(rng)
    ws = [initialize_weight_matrix(n_in, n_out, mag=mag, base_dist=base_dist, scale=scale, rng=rng) for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:])]
    if include_biases:
        bs = [np.zeros(n_out) for n_out in layer_sizes[1:]]
        return zip(ws, bs)
    else:
        return ws
def demo_settling_dynamics(symmetric=False,
                           n_hidden=50,
                           n_out=3,
                           input_influence=0.01,
                           learning_rate=0.0001,
                           cut_time=None,
                           minibatch_size=1,
                           decay=0.05,
                           scale=.4,
                           hidden_act='tanh',
                           output_act='lin',
                           draw_every=10,
                           n_steps=10000,
                           seed=124):
    """
    Here we use Predictive Coding and compare_learning_curves the convergence of a predictive-coded network to one without.
    """

    rng = get_rng(seed)
    net_d = Network.from_init(symmetric=symmetric,
                              n_hidden=n_hidden,
                              n_out=n_out,
                              scale=scale,
                              fh=hidden_act,
                              fx=output_act,
                              decay=decay,
                              rng=rng)
    state_d = net_d.init_state(minibatch_size=minibatch_size)

    net_l = Network.from_init(symmetric=symmetric,
                              n_hidden=n_hidden,
                              n_out=n_out,
                              scale=scale,
                              fh=hidden_act,
                              fx=output_act,
                              decay=decay,
                              rng=rng,
                              input_influence=input_influence,
                              learning_rate=learning_rate)
    state_l = net_l.init_state(minibatch_size=minibatch_size)

    sp = Speedometer()
    for t in range(n_steps):

        error = (state_d.x[0] - state_l.x[0]).mean()
        with hold_dbplots(draw_every=draw_every):
            dbplot(state_d.h[0], 'hd')
            dbplot(state_d.x[0], 'xd')
            dbplot(state_l.h[0], 'hl')
            dbplot(state_l.x[0], 'xl')
            dbplot(np.array([abs(net_l.w_hx).mean()]), 'wmag')
            dbplot(error, 'error')

        state_d = net_d.update(state_d)
        state_l = net_l.update(
            state_l,
            inp=state_d.x if cut_time is None or t < cut_time else None)

        if t % 100 == 0:
            print(f'Rate: {sp(t+1)} iter/s')
Esempio n. 7
0
def proportional_random_assignment(length, split, rng):
    """
    Generate an integer array of the given length, with elements randomly assigned to 0...len(split), with
    frequency of elements with value i proporational to split[i].

    This is useful for splitting training/test sets.  e.g.

        n_samples = 1000
        x = np.random.randn(n_samples, 4)
        y = np.random.randn(n_samples)
        subsets = proportional_random_assignment(n_samples, split=0.7, rng=1234)
        x_train = x[subsets==0]
        y_train = y[subsets==0]
        x_test = x[subsets==1]
        y_test = y[subsets==1]

    :param length: The length of the output array
    :param split: Either a list of ratios to assign to each group (must add to <1), or a single float in (0, 1),
        which will indicate that we split into 2 groups.
    :param rng: A random number generator or seed.
    :return: An integer array.
    """
    rng = get_rng(rng)
    if isinstance(split, float):
        split = [split]
    assert 0<=np.sum(split)<=1, "The sum of elements in split: {} must be in [0, 1].  Got {}".format(split, np.sum(split))
    arr = np.zeros(length, dtype=int)
    cut_points = np.concatenate([np.round(np.cumsum(split)*length).astype(int), [length]])
    scrambled_indices = rng.permutation(length)
    for i, (c_start, c_end) in enumerate(zip(cut_points[:-1], cut_points[1:])):
        arr[scrambled_indices[c_start:c_end]] = i+1  # Note we skip zero since arrays already inited to 0
    return arr
Esempio n. 8
0
def initialize_weight_matrix(n_in, n_out, mag='xavier', base_dist='normal', scale=1., rng=None):
    """
    Initialize a weight matrix
    :param n_in: Number of input units
    :param n_out: Number of output units
    :param mag: The magnitude, or a string identifying how to calculate the magnitude.
        String options can be:
            'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers.
            'xavier-both': - A compromize between preserving the variance of the forward, backward pass
            'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net.
    :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array
    :param rng: Random number generator or seed
    :return: A shape (n_in, n_out) initial weight matrix.
    """
    rng = get_rng(rng)

    w_base = rng.randn(n_in, n_out) if base_dist == 'normal' else \
        (rng.rand(n_in, n_out) - 0.5)*np.sqrt(12) if base_dist=='uniform' else \
        bad_value(base_dist)

    mag_number = \
        np.sqrt(2./(n_in+n_out)) if mag in ('xavier', 'xavier-both') else \
        np.sqrt(1./n_in) if mag=='xavier-forward' else \
        np.sqrt(2./n_in) if mag=='xavier-relu' else \
        mag if isinstance(mag, numbers.Real) else \
        bad_value(mag)

    return w_base * (mag_number*scale)
Esempio n. 9
0
def lowpass_random(n_samples,
                   cutoff,
                   n_dim=None,
                   rng=None,
                   normalize=False,
                   slope=0):
    """
    Return a random lowpass-filtered signal.
    :param n_samples:
    :param cutoff:
    :param rng:
    :return:
    """
    rng = get_rng(rng)
    assert 0 <= cutoff <= 1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)"
    base_signal = rng.randn(n_samples) if n_dim is None else rng.randn(
        n_samples, n_dim)
    lowpass_signal = lowpass(base_signal, cutoff)
    if normalize:
        lowpass_signal = lowpass_signal / np.std(lowpass_signal)
    if slope != 0:
        ramp = slope * np.arange(len(lowpass_signal))
        lowpass_signal = lowpass_signal + (ramp
                                           if n_dim is None else ramp[:, None])
    return lowpass_signal
Esempio n. 10
0
def initialize_network_params(layer_sizes, mag='xavier-both', base_dist='normal', last_layer_zero = False, include_biases = True, scale=1., rng=None):
    """
    Initialize parameters for a fully-connected neural network.

    :param layer_sizes: A list of integers indicating layer sizes (including that of the input layer)
    :param mag: The standard deviation, or a string identifying a method for selecting the standard deviation.
        String options can be:
            'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers.
            'xavier-both': - A compromize between preserving the variance of the forward, backward pass
            'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net.
    :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array
    :param include_biases: Also create initial biases.
    :param rng: A random number generator or seed
    :return: A list of 2-tuples of (weight, bias) parameters (if include_biases is True) otherwise a list of weight matrices.

    Note: To get the weights/biases in separate lists, simply go:
        weights, biases = zip(*initialize_network_params(...))

    Note: See http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization
    For a good explanation of the 'xavier' initialization schemes.
    """
    rng = get_rng(rng)
    ws = [initialize_weight_matrix(n_in, n_out, mag=mag, base_dist=base_dist, scale=scale, rng=rng) for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:])]
    if last_layer_zero:
        ws[-1][:] = 0
    if include_biases:
        bs = [np.zeros(n_out) for n_out in layer_sizes[1:]]
        return zip(ws, bs)
    else:
        return ws
Esempio n. 11
0
def scaled_quantized_forward_pass(inputs, weights, scales = None, biases = None, hidden_activations='relu', output_activations = 'relu',
            quantization_method = 'herd', rng=None):
    """
    Return the activations from a forward pass of a ReLU net.
    :param inputs: A (n_frames, n_dims_in) array
    :param weights: A list of (n_dim_in, n_dim_out) weight matrices
    :param biases: An optional (len(weights)) list of (w.shape[1]) biases for each weight matrix
    :param hidden_activations: Indicates the hidden layer activation function
    :param output_activations: Indicates the output layer activation function
    :param quantization_method: The method for quantizing (see function: sequential_quantize)
    :param rng: A random number generator or seed
    :return: activations:
        A len(weights)*3+1 list of (n_frames, n_dims) activations.
        Elements [::3] will be a length(w)+1 list containing the input to each rounding unit, and the final output
        Elements [1::3] will be the length(w) rounded "spike" signal.
        Elements [2::3] will be the length(w) inputs to each nonlinearity
    """
    rng = get_rng(rng)
    activations = [inputs]
    if biases is None:
        biases = [0]*len(weights)
    else:
        assert len(biases)==len(weights)

    if scales is None:
        scales = [1.]*len(weights)

    x = inputs  # (n_samples, n_units)
    for i, (w, b, k) in enumerate(izip_equal(weights, biases, scales)):
        s = quantize(x*k, method=quantization_method, rng=rng)
        u = (s/k).dot(w)+b
        x = activation_function(u, output_activations if i==len(weights)-1 else hidden_activations)
        activations += [s, u, x]
    return activations
Esempio n. 12
0
def get_synthetic_deep_data(n_samples,
                            layer_sizes,
                            hidden_activations='softplus',
                            output_activation='linear',
                            normalize=True,
                            rng=1234):
    """
    Generate data from a randomly initialized neural network.
    :param n_samples: Number of samples to generate
    :param layer_sizes: Sizes of network layers
    :param hidden_activations: Hidden activation functions
    :param output_activation: Output activation function
    :param normalize: Normalize the output over samples (remove global mean, divide by std)
    :param rng: A random number generator or seed.
    :return: x, y
        x is an (n_samples, layer_sizes[0]) array
        y is a (n_samples, layer_sizes[-1]) array
    """
    rng = get_rng(rng)
    ws = initialize_network_params(layer_sizes=layer_sizes,
                                   mag='xavier-forward',
                                   include_biases=False,
                                   rng=rng)
    x = rng.randn(n_samples, layer_sizes[0])
    y = forward_pass(input_data=x,
                     weights=ws,
                     hidden_activations=hidden_activations,
                     output_activation=output_activation)
    if normalize:
        y = (y - y.mean(axis=0)) / y.std(axis=0)
    return x, y
Esempio n. 13
0
 def __init__(self, kp, ki=0., kd=0., noise = 0., rng = None):
     self.kp = kp
     self.kd = kd
     self.ki = ki
     self.xp = 0
     self.s = 0
     self.noise = noise
     self.rng = get_rng(rng)
Esempio n. 14
0
 def __init__(self, kp, ki=0., kd=0., noise=0., rng=None):
     self.kp = kp
     self.kd = kd
     self.ki = ki
     self.xp = 0
     self.s = 0
     self.noise = noise
     self.rng = get_rng(rng)
Esempio n. 15
0
def stochastically_rounded_relu_forward_pass_guts(weights, input_data, n_steps, rng = None):
    rng = get_rng(rng)
    activation = np.round(n_steps*input_data + rng.uniform(-.5, .5, size=input_data.shape))/float(n_steps)
    activations = [activation]
    for w in weights:
        u = np.maximum(0, activation.dot(w))
        activation = np.round(u*n_steps + rng.uniform(-.5, .5, size=u.shape))/float(n_steps)
        activations.append(activation)
    return activations
Esempio n. 16
0
def estimate_log_z(w, b_h, b_v, annealing_ratios, n_runs = 10, rng = None):
    """
    Use Annealed importance sampling
    http://www.iro.umontreal.ca/~lisa/pointeurs/breuleux+bengio_nc2011.pdf
    To estimate the probability of the test data given the RBM parameters.

    This code is a Pythonified version of Russ Salakhutdinov's Matlab code:
    http://www.utstat.toronto.edu/~rsalakhu/code_AIS/RBM_AIS.m

    NOTE: THIS CODE DOES NOT SEEM TO BE PRODUCING GOOD RESULTS (They don't match with exact numbers.  Not sure why!)
    Better option: Use the rbm_ais method from pylearn2 (from pylearn2.rbm_tools import rbm_ais)

    :param w: Weights (n_visible, n_hidden)
    :param b_h: Hidden biases (n_hidden)
    :param b_v: Visible biases (n_visible)
    :param annealing_ratios: A monotonically increasing vector from 0 to 1
    :param n_runs: Number of annealing chains to use.
    :param rng: Random Number generator
    :return:
    """
    assert annealing_ratios[0]==0 and annealing_ratios[-1]==1 and np.all(np.diff(annealing_ratios)>0)
    rng = get_rng(rng)
    n_visible, n_hidden = w.shape
    visbiases_base = np.zeros_like(b_v)
    neg_data = rng.rand(n_runs, n_visible) < sigm(visbiases_base)  # Collect
    logww = - neg_data.dot(visbiases_base) - n_hidden*np.log(2)
    w_h = neg_data.dot(w)+b_h
    bv_base = neg_data.dot(visbiases_base)
    bee_vee = bv_base
    for t, r in enumerate(annealing_ratios):
        exp_wh = np.exp(r*w_h)
        logww += (1-r)*bv_base + r*bee_vee + np.sum(np.log(1+exp_wh), axis =1)
        wake_hid_probs = exp_wh/(1+exp_wh)
        wake_hid_states = wake_hid_probs > rng.rand(*wake_hid_probs.shape)
        neg_vis_probs = sigm((1-r)*visbiases_base + r*(wake_hid_states.dot(w.T)+b_v))
        neg_vis_states = neg_vis_probs > rng.rand(*neg_vis_probs.shape)

        w_h = neg_vis_states.dot(w)+b_h
        bv_base = neg_vis_states.dot(visbiases_base)
        bee_vee = neg_vis_states.dot(b_v)

        exp_wh = np.exp(r*w_h)
        logww -= (1-r)*bv_base + r*bee_vee + np.sum(np.log(1+exp_wh), axis = 1)

    exp_wh = np.exp(w_h)
    logww += neg_data.dot(b_v) + np.sum(np.log(1+exp_wh), axis = 1)

    np.mean(logww)
    r_ais = logsumexp(logww) - np.log(n_runs)
    log_z_base = np.sum(np.log(1+np.exp(visbiases_base))) + n_hidden*np.log(2)
    log_z_est = r_ais + log_z_base
    aa = np.mean(logww)
    logstd_AIS = np.log(np.std(np.exp(logww-aa))) + aa - np.log(n_runs)/2
    logZZ_est_up = logsumexp([np.log(3)+logstd_AIS, r_ais], axis = 0) + log_z_base
    logZZ_est_down = logdiffexp([(np.log(3)+logstd_AIS), r_ais], axis = 0) + log_z_base
    return log_z_est, (logZZ_est_up, logZZ_est_down)
Esempio n. 17
0
def initialize_conv_kernel(kernel_shape, mag = 'xavier', rng = None):
    rng = get_rng(rng)
    if mag=='xavier':
        n_kern_out, n_kern_in, k_size_y, k_size_x = kernel_shape
        fan_in = k_size_y*k_size_x*n_kern_in
        fan_out = n_kern_out*k_size_y+k_size_x
        mag = np.sqrt(2./(fan_in+fan_out))
    else:
        assert isinstance(mag, (int, float)), mag
    return mag*rng.randn(*kernel_shape)
Esempio n. 18
0
def initialize_conv_kernel(kernel_shape, mag='xavier', rng=None):
    rng = get_rng(rng)
    if mag == 'xavier':
        n_kern_out, n_kern_in, k_size_y, k_size_x = kernel_shape
        fan_in = k_size_y * k_size_x * n_kern_in
        fan_out = n_kern_out * k_size_y + k_size_x
        mag = np.sqrt(2. / (fan_in + fan_out))
    else:
        assert isinstance(mag, (int, float)), mag
    return mag * rng.randn(*kernel_shape)
Esempio n. 19
0
 def __init__(self,
              precision=1.,
              threshold=0.5,
              values=None,
              phi_init=0,
              rng=None):
     assert values is None or len(values) == 2
     self.rng = get_rng(rng)
     self.phi = phi_init
     self.precision = precision
     self.threshold = threshold
     self.values = values
Esempio n. 20
0
def demo_variational_autoencoder(minibatch_size=100,
                                 n_epochs=2000,
                                 plot_interval=100,
                                 seed=None):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    :param minibatch_size: Number of elements in the minibatch
    :param n_epochs: Number of passes through dataset
    :param plot_interval: Plot every x iterations
    """

    data = get_mnist_dataset(flat=True).training_set.input

    if is_test_mode():
        n_epochs = 1
        minibatch_size = 10
        data = data[:100]

    rng = get_rng(seed)

    model = VariationalAutoencoder(pq_pair=EncoderDecoderNetworks(
        x_dim=data.shape[1],
        z_dim=20,
        encoder_hidden_sizes=[200],
        decoder_hidden_sizes=[200],
        w_init=lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out),
        x_distribution='bernoulli',
        z_distribution='gaussian',
        hidden_activation='softplus'),
                                   optimizer=AdaMax(alpha=0.003),
                                   rng=rng)

    training_fcn = model.train.compile()

    sampling_fcn = model.sample.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(data,
                              minibatch_size=minibatch_size,
                              n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            print 'Epoch %s' % (i * minibatch_size / float(len(data)), )
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            dbplot(
                model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape(
                    -1, 28, 28), 'dec')
            dbplot(
                model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape(
                    -1, 28, 28), 'enc')
Esempio n. 21
0
def initialize_params(layer_sizes: Sequence[int],
                      initial_weight_scale=1.,
                      rng=None) -> Sequence[LayerParams]:
    rng = get_rng(rng)
    ws = [
        uniform(low=-initial_weight_scale * (6. / (n_pre + n_post))**.5,
                high=(6. / (n_pre + n_post))**.5,
                size=(n_pre, n_post))
        for n_pre, n_post in izip_equal(layer_sizes[:-1], layer_sizes[1:])
    ]
    bs = [torch.zeros(n_post) for n_post in layer_sizes[1:]]
    return _params_vals_to_params(ws, bs)
Esempio n. 22
0
 def from_initializer(cls,
                      n_in,
                      n_out,
                      w_init_mag=0.01,
                      rng=None,
                      **kwargs):
     rng = get_rng(rng)
     return cls(w=w_init_mag * rng.randn(n_in, n_out),
                b=np.zeros(n_out),
                w_rev=w_init_mag * rng.randn(n_out, n_in),
                b_rev=np.zeros(n_in),
                rng=rng,
                **kwargs)
Esempio n. 23
0
def get_vgg_video_splice(video_identifiers, shuffle=False, shuffling_rng=None):

    videos = np.concatenate([
        load_ilsvrc_video(identifier, size=(224, 224))
        for identifier in video_identifiers
    ])
    vgg_mode_videos = im2vgginput(videos)

    if shuffle:
        rng = get_rng(shuffling_rng)
        rng.shuffle(vgg_mode_videos)

    return videos, vgg_mode_videos
Esempio n. 24
0
def discretize(x, approach='noisy-round', scale = 1, rng = None):

    if rng is None:
        rng = get_rng(rng)
    if approach == 'noisy-round':
        return np.round(x*scale + rng.uniform(low=-.5, high=.5, size=x.shape))/scale
    elif approach == 'round':
        return np.round(x*scale)/scale
    elif approach == 'noisy-add':
        return x + rng.uniform(-.5, .5, size=x.shape)/scale
    elif approach == 'surrogate-noise':
        return x + (12**.5)*((x%1)-(x%1)**2)*rng.uniform(low=-.5, high=.5, size=x.shape)/scale
    else:
        raise Exception('No discretization approach: %s' % approach)
Esempio n. 25
0
def stochastically_rounded_relu_forward_pass_guts(weights,
                                                  input_data,
                                                  n_steps,
                                                  rng=None):
    rng = get_rng(rng)
    activation = np.round(n_steps * input_data + rng.uniform(
        -.5, .5, size=input_data.shape)) / float(n_steps)
    activations = [activation]
    for w in weights:
        u = np.maximum(0, activation.dot(w))
        activation = np.round(
            u * n_steps + rng.uniform(-.5, .5, size=u.shape)) / float(n_steps)
        activations.append(activation)
    return activations
Esempio n. 26
0
def lowpass_random(n_samples, cutoff, n_dim=None, rng = None, normalize = False, slope=0):
    """
    Return a random lowpass-filtered signal.
    :param n_samples:
    :param cutoff:
    :param rng:
    :return:
    """
    rng = get_rng(rng)
    assert 0<=cutoff<=1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)"
    base_signal = rng.randn(n_samples) if n_dim is None else rng.randn(n_samples, n_dim)
    lowpass_signal = lowpass(base_signal, cutoff)
    if normalize:
        lowpass_signal = lowpass_signal/np.std(lowpass_signal)
    if slope != 0:
        ramp = slope*np.arange(len(lowpass_signal))
        lowpass_signal = lowpass_signal+(ramp if n_dim is None else ramp[:, None])
    return lowpass_signal
 def from_init(cls,
               n_hidden,
               n_out,
               b_h=0,
               rng=None,
               scale=.1,
               symmetric=False,
               **kwargs):
     rng = get_rng(rng)
     w_hh = scale * initialize_weight_matrix(
         n_in=n_hidden, n_out=n_hidden, rng=rng)
     if symmetric:
         w_hh = .5 * (w_hh + w_hh.T)
     w_hx = scale * initialize_weight_matrix(
         n_in=n_hidden, n_out=n_out, rng=rng)
     w_xh = w_hx.T if symmetric else scale * initialize_weight_matrix(
         n_in=n_out, n_out=n_hidden, rng=rng)
     return Network(w_hh=w_hh, w_hx=w_hx, w_xh=w_xh, b_h=b_h, **kwargs)
Esempio n. 28
0
 def from_init(specifiers,
               input_shape,
               w_init=0.01,
               force_shared_parameters=True,
               rng=None):
     """
     Convenient initialization function.
     :param specifiers:
     :param input_shape:
     :param w_init:
     :param force_shared_parameters: Use shared parameters for conv layer (allows training).
     :param rng:
     :return:
     """
     rng = get_rng(rng)
     n_maps, n_rows, n_cols = input_shape
     layers = OrderedDict()
     if isinstance(specifiers, (list, tuple)):
         specifiers = OrderedDict(enumerate(specifiers))
     for spec_name, spec in specifiers.iteritems():
         if isinstance(spec, ConvInitSpec):
             spec = ConvolverSpec(
                 w=w_init *
                 rng.randn(spec.n_maps, n_maps, spec.filter_size[0],
                           spec.filter_size[1]),
                 b=np.zeros(spec.n_maps) if spec.use_bias else False,
                 mode=spec.mode)
         if isinstance(spec, ConvolverSpec):
             n_maps = spec.w.shape[0]
             if spec.mode == 'valid':
                 n_rows += -spec.w.shape[2] + 1
                 n_cols += -spec.w.shape[3] + 1
             elif isinstance(spec.mode, int):
                 n_rows += -spec.w.shape[2] + 1 + spec.mode * 2
                 n_cols += -spec.w.shape[3] + 1 + spec.mode * 2
         elif isinstance(spec, PoolerSpec):
             n_rows /= spec.region[0]
             n_cols /= spec.region[1]
         layers[spec_name] = specifier_to_layer(
             spec, force_shared_parameters=force_shared_parameters, rng=rng)
         LOGGER.info('Layer "%s" (%s) output shape: %s' %
                     (spec_name, spec.__class__.__name__,
                      (n_maps, n_rows, n_cols)))
     return ConvNet(layers)
Esempio n. 29
0
def scaled_quantized_forward_pass(inputs,
                                  weights,
                                  scales=None,
                                  biases=None,
                                  hidden_activations='relu',
                                  output_activations='relu',
                                  quantization_method='herd',
                                  rng=None):
    """
    Return the activations from a forward pass of a ReLU net.
    :param inputs: A (n_frames, n_dims_in) array
    :param weights: A list of (n_dim_in, n_dim_out) weight matrices
    :param biases: An optional (len(weights)) list of (w.shape[1]) biases for each weight matrix
    :param hidden_activations: Indicates the hidden layer activation function
    :param output_activations: Indicates the output layer activation function
    :param quantization_method: The method for quantizing (see function: sequential_quantize)
    :param rng: A random number generator or seed
    :return: activations:
        A len(weights)*3+1 list of (n_frames, n_dims) activations.
        Elements [::3] will be a length(w)+1 list containing the input to each rounding unit, and the final output
        Elements [1::3] will be the length(w) rounded "spike" signal.
        Elements [2::3] will be the length(w) inputs to each nonlinearity
    """
    rng = get_rng(rng)
    activations = [inputs]
    if biases is None:
        biases = [0] * len(weights)
    else:
        assert len(biases) == len(weights)

    if scales is None:
        scales = [1.] * len(weights)

    x = inputs  # (n_samples, n_units)
    for i, (w, b, k) in enumerate(izip_equal(weights, biases, scales)):
        s = quantize(x * k, method=quantization_method, rng=rng)
        u = (s / k).dot(w) + b
        x = activation_function(
            u, output_activations if i == len(weights) -
            1 else hidden_activations)
        activations += [s, u, x]
    return activations
Esempio n. 30
0
def get_synthetic_deep_data(n_samples, layer_sizes, hidden_activations='softplus', output_activation='linear', normalize = True, rng=1234):
    """
    Generate data from a randomly initialized neural network.
    :param n_samples: Number of samples to generate
    :param layer_sizes: Sizes of network layers
    :param hidden_activations: Hidden activation functions
    :param output_activation: Output activation function
    :param normalize: Normalize the output over samples (remove global mean, divide by std)
    :param rng: A random number generator or seed.
    :return: x, y
        x is an (n_samples, layer_sizes[0]) array
        y is a (n_samples, layer_sizes[-1]) array
    """
    rng = get_rng(rng)
    ws = initialize_network_params(layer_sizes=layer_sizes, mag = 'xavier-forward', include_biases=False, rng=rng)
    x = rng.randn(n_samples, layer_sizes[0])
    y = forward_pass(input_data=x, weights=ws, hidden_activations=hidden_activations, output_activation=output_activation)
    if normalize:
        y = (y - y.mean(axis=0))/y.std(axis=0)
    return x, y
Esempio n. 31
0
 def __init__(self,
              layers,
              optimizer,
              layerwise_scales=True,
              corruption_type='round',
              rng=None):
     """
     layers is an OrdereDict of callables.
     """
     assert layerwise_scales, 'Only layerwise work now.'
     if isinstance(layers, (list, tuple)):
         layers = OrderedDict(enumerate(layers))
     else:
         assert isinstance(
             layers, OrderedDict
         ), "Layers must be presented as a list, tuple, or OrderedDict"
     self.layers = layers
     self.optimizer = optimizer
     self.layerwise_scales = layerwise_scales
     self.corruption_type = corruption_type
     self.rng = get_rng(rng)
Esempio n. 32
0
def proportional_random_assignment(length, split, rng):
    """
    Generate an integer array of the given length, with elements randomly assigned to 0...len(split), with
    frequency of elements with value i proporational to split[i].

    This is useful for splitting training/test sets.  e.g.

        n_samples = 1000
        x = np.random.randn(n_samples, 4)
        y = np.random.randn(n_samples)
        subsets = proportional_random_assignment(n_samples, split=0.7, rng=1234)
        x_train = x[subsets==0]
        y_train = y[subsets==0]
        x_test = x[subsets==1]
        y_test = y[subsets==1]

    :param length: The length of the output array
    :param split: Either a list of ratios to assign to each group (must add to <1), or a single float in (0, 1),
        which will indicate that we split into 2 groups.
    :param rng: A random number generator or seed.
    :return: An integer array.
    """
    rng = get_rng(rng)
    if isinstance(split, float):
        split = [split]
    assert 0 <= np.sum(
        split
    ) <= 1, "The sum of elements in split: {} must be in [0, 1].  Got {}".format(
        split, np.sum(split))
    arr = np.zeros(length, dtype=int)
    cut_points = np.concatenate(
        [np.round(np.cumsum(split) * length).astype(int), [length]])
    scrambled_indices = rng.permutation(length)
    for i, (c_start, c_end) in enumerate(zip(cut_points[:-1], cut_points[1:])):
        arr[scrambled_indices[
            c_start:
            c_end]] = i + 1  # Note we skip zero since arrays already inited to 0
    return arr
Esempio n. 33
0
def sequential_quantize(v, n_steps = None, method='herd', rng = None):
    """
    :param v: A (..., n_samples, n_units, ) array
    :param n_steps: The number of steps to spike for
    :return: An (..., n_steps, n_units) array of quantized values
    """
    rng = get_rng(rng)
    assert v.ndim>=2
    if n_steps is None:
        n_steps = v.shape[-2]
    else:
        assert n_steps == v.shape[-2]

    if method=='herd':
        result = fixed_diff(np.round(np.cumsum(v, axis=-2)), axis=-2)
    elif method=='herd2':
        result = fixed_diff(fixed_diff(np.round(np.cumsum(np.cumsum(v, axis=-2), axis=-2)), axis=-2), axis=-2)
    elif method=='round':
        result = np.round(v)
    elif method == 'slippery.9':
        result = slippery_round(v, slip=0.9)
    elif method == 'slippery.5':
        result = slippery_round(v, slip=0.5)
    elif method == 'randn':
        result = v + rng.randn(*v.shape)
    elif method=='uniform':
        result = v + rng.uniform(-.5, .5, size=v.shape)
    elif method=='surrogate-noise':
        result = v + (12**.5)*((v%1)-(v%1)**2)*rng.uniform(low=-.5, high=.5, size=v.shape)
    elif method == 'surrogate-sqrt':
        result = v + np.sqrt((12**.5)*((v%1)-(v%1)**2)*rng.uniform(low=-.5, high=.5, size=v.shape))
    elif method is None:
        result = v
    else:
        raise NotImplementedError("Don't have quantization method '%s' implemented" % (method, ))
    return result
def experiment_mnist_eqprop(
    layer_constructor,
    n_epochs=10,
    hidden_sizes=(500, ),
    minibatch_size=20,
    beta=.5,
    random_flip_beta=True,
    learning_rate=.05,
    n_negative_steps=20,
    n_positive_steps=4,
    initial_weight_scale=1.,
    online_checkpoints_period=None,
    epoch_checkpoint_period=.25,
    skip_zero_epoch_test=False,
    n_test_samples=None,
    prop_direction: Union[str, Tuple] = 'neutral',
    bidirectional=True,
    renew_activations=True,
    do_fast_forward_pass=False,
    rebuild_coders=True,
    l2_loss=None,
    splitstream=True,
    seed=1234,
):
    """
    Replicate the results of Scellier & Bengio:
        Equilibrium Propagation: Bridging the Gap between Energy-Based Models and Backpropagation
        https://www.frontiersin.org/articles/10.3389/fncom.2017.00024/full

    Specifically, the train_model demo here:
        https://github.com/bscellier/Towards-a-Biologically-Plausible-Backprop

    Differences between our code and theirs:
    - We do not keep persistent layer activations tied to data points over epochs.  So our results should only really match for the first epoch.
    - We evaluate training score periodically, rather than online average (however you can see online score by setting online_checkpoints_period to something that is not None)
    """

    print('Params:\n' +
          '\n'.join(list(f'  {k} = {v}' for k, v in locals().items())))

    rng = get_rng(seed)
    n_in = 784
    n_out = 10
    dataset = get_mnist_dataset(flat=True, n_test_samples=None).to_onehot()
    x_train, y_train = dataset.training_set.xy
    x_test, y_test = dataset.test_set.xy  # Their 'validation set' is our 'test set'

    if is_test_mode():
        x_train, y_train, x_test, y_test = x_train[:
                                                   100], y_train[:
                                                                 100], x_test[:
                                                                              100], y_test[:
                                                                                           100]
        n_epochs = 1

    layer_sizes = [n_in] + list(hidden_sizes) + [n_out]

    rng = get_rng(rng)

    y_train = y_train.astype(np.float32)

    ra = RunningAverage()
    sp = Speedometer(mode='last')
    is_online_checkpoint = Checkpoints(
        online_checkpoints_period, skip_first=skip_zero_epoch_test
    ) if online_checkpoints_period is not None else lambda: False
    is_epoch_checkpoint = Checkpoints(epoch_checkpoint_period,
                                      skip_first=skip_zero_epoch_test)

    results = Duck()

    training_states = initialize_states(
        layer_constructor=layer_constructor,
        n_samples=minibatch_size,
        params=initialize_params(layer_sizes=layer_sizes,
                                 initial_weight_scale=initial_weight_scale,
                                 rng=rng))

    if isinstance(prop_direction, str):
        fwd_prop_direction, backward_prop_direction = prop_direction, prop_direction
    else:
        fwd_prop_direction, backward_prop_direction = prop_direction

    for i, (ixs, info) in enumerate(
            minibatch_index_info_generator(n_samples=x_train.shape[0],
                                           minibatch_size=minibatch_size,
                                           n_epochs=n_epochs)):
        epoch = i * minibatch_size / x_train.shape[0]

        if is_epoch_checkpoint(epoch):
            n_samples = n_test_samples if n_test_samples is not None else len(
                x_test)
            y_pred_test, y_pred_train = [
                run_inference(
                    x_data=x[:n_test_samples],
                    states=initialize_states(
                        layer_constructor=layer_constructor,
                        params=[s.params for s in training_states],
                        n_samples=min(len(x), n_test_samples)
                        if n_test_samples is not None else len(x)),
                    n_steps=n_negative_steps,
                    prop_direction=fwd_prop_direction,
                ) for x in (x_test, x_train)
            ]
            # y_pred_train = run_inference(x_data=x_train[:n_test_samples], states=initialize_states(params=[s.params for s in training_states], n_samples=min(len(x_train), n_test_samples) if n_test_samples is not None else len(x_train)))
            test_error = percent_argmax_incorrect(y_pred_test,
                                                  y_test[:n_test_samples])
            train_error = percent_argmax_incorrect(y_pred_train,
                                                   y_train[:n_test_samples])
            print(
                f'Epoch: {epoch:.3g}, Iter: {i}, Test Error: {test_error:.3g}%, Train Error: {train_error:.3g}, Mean Rate: {sp(i):.3g}iter/s'
            )
            results[next, :] = dict(iter=i,
                                    epoch=epoch,
                                    train_error=train_error,
                                    test_error=test_error)
            yield results
            if epoch > 2 and train_error > 50:
                return

        # The Original training loop, just taken out here:
        x_data_sample, y_data_sample = x_train[ixs], y_train[ixs]
        training_states = run_eqprop_training_update(
            x_data=x_data_sample,
            y_data=y_data_sample,
            layer_states=training_states,
            beta=beta,
            random_flip_beta=random_flip_beta,
            learning_rate=learning_rate,
            layer_constructor=layer_constructor,
            bidirectional=bidirectional,
            l2_loss=l2_loss,
            renew_activations=renew_activations,
            n_negative_steps=n_negative_steps,
            n_positive_steps=n_positive_steps,
            prop_direction=prop_direction,
            splitstream=splitstream,
            rng=rng)
        this_train_score = ra(
            percent_argmax_correct(output_from_state(training_states),
                                   y_train[ixs]))
        if is_online_checkpoint():
            print(
                f'Epoch {epoch:.3g}: Iter {i}: Score {this_train_score:.3g}%: Mean Rate: {sp(i):.2g}'
            )
 def init_state(self, minibatch_size, rng=None):
     rng = get_rng(rng)
     n_hidden, n_out = self.w_hx.shape
     return NetworkState(h=rng.randn(minibatch_size, n_hidden),
                         x=rng.randn(minibatch_size, n_out))
Esempio n. 36
0
def demo_quantized_convergence(
        quantized_layer_constructor,
        smooth_epsilon=0.5,
        layer_sizes=(500, 500, 10),
        initialize_acts_randomly=False,
        minibatch_size=1,
        # n_steps = 100,
        n_steps=10000,
        initial_weight_scale=1.,
        prop_direction='neutral',
        data_seed=1241,
        param_seed=1237,
        hang=True,
        plot=False):
    """
    """

    smooth_layer_constructor = SimpleLayerController.get_partial_constructor(
        epsilon=smooth_epsilon)

    print('Params:\n' +
          '\n'.join(list(f'  {k} = {v}' for k, v in locals().items())))

    data_rng = get_rng(data_seed)
    param_rng = get_rng(param_seed)

    HISTORY_LEN = n_steps
    N_NEURONS_TO_PLOT = 10

    if is_test_mode():
        n_steps = 10

    pi = ProgressIndicator(update_every='2s', expected_iterations=2 * n_steps)
    n_in, n_out = layer_sizes[0], layer_sizes[-1]

    x_data = data_rng.rand(minibatch_size, n_in)

    params = initialize_params(layer_sizes=layer_sizes,
                               initial_weight_scale=initial_weight_scale,
                               rng=param_rng)

    def run_update(layer_constructor, mode):

        plt.gca().set_prop_cycle(None)

        states = initialize_states(layer_constructor=layer_constructor,
                                   n_samples=minibatch_size,
                                   params=params)

        for t in range(n_steps):

            states = eqprop_step(layer_states=states,
                                 x_data=x_data,
                                 beta=0,
                                 y_data=None,
                                 direction=prop_direction)
            acts = [s.potential for s in states]
            yield acts
            if plot:
                dbplot_collection(
                    [a[0, :N_NEURONS_TO_PLOT] for a in acts],
                    f'{mode} acts',
                    axis='acts',
                    draw_every='5s',
                    cornertext=f'Negative Phase: {t}',
                    plot_type=lambda: MovingPointPlot(
                        buffer_len=HISTORY_LEN,
                        plot_kwargs=dict(linestyle='-.'
                                         if mode == 'Smooth' else '-'),
                        reset_color_cycle=True))
                # dbplot_collection([a[0, :N_NEURONS_TO_PLOT] for a in acts], f'{mode} acts', axis='acts', draw_every=1, cornertext=f'Negative Phase: {t}', plot_type = lambda: MovingPointPlot(buffer_len=HISTORY_LEN, plot_kwargs=dict(linestyle = '-.' if mode=='Smooth' else '-'), reset_color_cycle=True))
            pi()

    smooth_record = list(
        run_update(layer_constructor=smooth_layer_constructor, mode='Smooth'))
    smooth_acts = smooth_record[-1]

    rough_record = list(
        run_update(layer_constructor=quantized_layer_constructor,
                   mode='Rough'))
    rough_acts = rough_record[-1]

    rs_online_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_rough, hs_smooth in zip(rough_record, smooth_record)])
    rs_end_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_smooth in [smooth_record[-1]] for hs_rough in rough_record])
    rr_end_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_smooth in [rough_record[-1]] for hs_rough in rough_record])
    ss_end_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_smooth in [smooth_record[-1]] for hs_rough in smooth_record])

    mean_abs_error = np.mean(rs_online_errors, axis=0)
    final_abs_error = rs_online_errors[-1]
    print(
        f'Mean Abs Layerwise Errors: {np.array_str(mean_abs_error, precision=5)}\t Final Layerwise Errors: {np.array_str(final_abs_error,  precision=5)}'
    )

    return rs_online_errors, rs_end_errors, rr_end_errors, ss_end_errors
Esempio n. 37
0
def experiment_mnist_eqprop_torch(
    layer_constructor: Callable[[int, LayerParams], IDynamicLayer],
    n_epochs=10,
    hidden_sizes=(500, ),
    minibatch_size=10,  # update mini-batch size
    batch_size=500,  # total batch size
    beta=.5,
    random_flip_beta=True,
    learning_rate=.05,
    n_negative_steps=120,
    n_positive_steps=80,
    initial_weight_scale=1.,
    online_checkpoints_period=None,
    epoch_checkpoint_period=1.0,  #'100s', #{0: .25, 1: .5, 5: 1, 10: 2, 50: 4},
    skip_zero_epoch_test=False,
    n_test_samples=10000,
    prop_direction: Union[str, Tuple] = 'neutral',
    bidirectional=True,
    renew_activations=True,
    do_fast_forward_pass=False,
    rebuild_coders=True,
    l2_loss=None,
    splitstream=False,
    seed=1234,
    prediction_inp_size=17,  ## prediction input size
    delay=18,  ## delay size for the clamped phase
    pred=True,  ## if you want to use the prediction
    check_flg=False,
):
    """
    Replicate the results of Scellier & Bengio:
        Equilibrium Propagation: Bridging the Gap between Energy-Based Models and Backpropagation
        https://www.frontiersin.org/articles/10.3389/fncom.2017.00024/full

    Specifically, the train_model demo here:
        https://github.com/bscellier/Towards-a-Biologically-Plausible-Backprop

    Differences between our code and theirs:
    - We do not keep persistent layer activations tied to data points over epochs.  So our results should only really match for the first epoch.
    - We evaluate training score periodically, rather than online average (however you can see online score by setting online_checkpoints_period to something that is not None)
    """
    torch.manual_seed(seed)
    device = 'cuda' if torch.cuda.is_available(
    ) and USE_CUDA_WHEN_AVAILABLE else 'cpu'
    if device == 'cuda':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    print(f'Using Device: {device}')

    print('Params:\n' +
          '\n'.join(list(f'  {k} = {v}' for k, v in locals().items())))

    rng = get_rng(seed)
    n_in = 784
    n_out = 10

    dataset = input_data.read_data_sets('MNIST_data', one_hot=True)

    x_train, y_train = torch.tensor(
        dataset.train.images, dtype=torch.float32
    ).to(device), torch.tensor(dataset.train.labels, dtype=torch.float32).to(
        device
    )  #(torch.tensor(a.astype(np.float32)).to(device) for a in dataset.mnist.train.images.xy)
    x_test, y_test = torch.tensor(
        dataset.test.images, dtype=torch.float32).to(device), torch.tensor(
            dataset.test.labels, dtype=torch.float32).to(
                device)  # Their 'validation set' is our 'test set'
    x_val, y_val = torch.tensor(
        dataset.validation.images,
        dtype=torch.float32).to(device), torch.tensor(
            dataset.validation.labels, dtype=torch.float32).to(
                device)  # Their 'validation set' is our 'test set'

    if is_test_mode():
        x_train, y_train, x_test, y_test, x_val, y_val = x_train[:
                                                                 100], y_train[:
                                                                               100], x_test[:
                                                                                            100], y_test[:
                                                                                                         100], x_val[:
                                                                                                                     100], y_val[:
                                                                                                                                 100]
        n_epochs = 1
        n_negative_steps = 3
        n_positive_steps = 3

    layer_sizes = [n_in] + list(hidden_sizes) + [n_out]

    ra = RunningAverage()
    sp = Speedometer(mode='last')
    is_online_checkpoint = Checkpoints(
        online_checkpoints_period, skip_first=skip_zero_epoch_test
    ) if online_checkpoints_period is not None else lambda: False
    is_epoch_checkpoint = Checkpoints(epoch_checkpoint_period,
                                      skip_first=skip_zero_epoch_test)

    training_states = initialize_states(
        layer_constructor=layer_constructor,
        #n_samples=minibatch_size,
        n_samples=batch_size,
        params=initialize_params(layer_sizes=layer_sizes,
                                 initial_weight_scale=initial_weight_scale,
                                 rng=rng))

    # dbplot(training_states[0].params.w_fore[:10, :10], str(rng.randint(265)))

    if isinstance(prop_direction, str):
        fwd_prop_direction, backward_prop_direction = prop_direction, prop_direction
    else:
        fwd_prop_direction, backward_prop_direction = prop_direction

    def do_test():
        # n_samples = n_test_samples if n_test_samples is not None else len(x_test)
        test_error, train_error, val_error = [
            percent_argmax_incorrect(
                run_inference(
                    x_data=x[:n_test_samples],
                    states=initialize_states(
                        layer_constructor=layer_constructor,
                        params=[s.params for s in training_states],
                        n_samples=n_samples),
                    n_steps=n_negative_steps,
                    prop_direction=fwd_prop_direction,
                ), y[:n_samples]).item()
            for x, y in [(x_test, y_test), (x_train, y_train), (x_val, y_val)]
            for n_samples in [
                min(len(x), n_test_samples
                    ) if n_test_samples is not None else len(x)
            ]
        ]  # Not an actal loop... just hack for assignment in comprehensions
        print(
            f'Epoch: {epoch:.3g}, Iter: {i}, Test Error: {test_error:.3g}%, Train Error: {train_error:.3g}, Validation Error: {val_error:.3g}, Mean Rate: {sp(i):.3g}iter/s'
        )

        return dict(iter=i,
                    epoch=epoch,
                    train_error=train_error,
                    test_error=test_error,
                    val_error=val_error), train_error, test_error, val_error

    results = Duck()
    pi = ProgressIndicator(expected_iterations=n_epochs *
                           dataset.train.num_examples / minibatch_size,
                           update_every='10s')

    dy_squared = []
    dy_squared.append(None)
    dy_squared.append(None)
    for i, (ixs, info) in enumerate(
            minibatch_index_info_generator(n_samples=x_train.size()[0],
                                           minibatch_size=batch_size,
                                           n_epochs=n_epochs)):
        epoch = i * batch_size / x_train.shape[0]

        if is_epoch_checkpoint(epoch):
            check_flg = False
            x_train, y_train = shuffle_data(x_train, y_train)
            with pi.pause_measurement():
                results[next, :], train_err, test_err, val_err = do_test()

                ## prepare for saving the parameters
                ws, bs = zip(*((s.params.w_aft, s.params.b)
                               for s in training_states[1:]))

                f = None
                if os.path.isfile(directory + '/log.txt'):
                    f = open(directory + '/log.txt', 'a')
                else:
                    os.mkdir(directory)
                    f = open(directory + '/log.txt', 'w')

                f.write("Epoch: " + str(epoch) + '\n')
                f.write("accuracy for training: " + str(train_err) + '\n')
                f.write("accuracy for testing: " + str(test_err) + '\n')
                f.write("accuracy for validation: " + str(val_err) + '\n')

                f.close()

                np.save(directory + '/w_epoch_' + str(epoch) + '.npy', ws)
                np.save(directory + '/b_epoch_' + str(epoch) + '.npy', bs)
                np.save(directory + '/dy_squared_epoch_' + str(epoch) + '.npy',
                        dy_squared)

                yield results
                if epoch > 100 and results[-1, 'train_error'] > 50:
                    return

        # The Original training loop, just taken out here:
        ixs = ixs.astype(np.int32)  # this is for python version 3.7

        x_data_sample, y_data_sample = x_train[ixs], y_train[ixs]

        training_states, dy_squared = run_eqprop_training_update(
            x_data=x_data_sample,
            y_data=y_data_sample,
            layer_states=training_states,
            beta=beta,
            random_flip_beta=random_flip_beta,
            learning_rate=learning_rate,
            layer_constructor=layer_constructor,
            bidirectional=bidirectional,
            l2_loss=l2_loss,
            renew_activations=renew_activations,
            n_negative_steps=n_negative_steps,
            n_positive_steps=n_positive_steps,
            prop_direction=prop_direction,
            splitstream=splitstream,
            rng=rng,
            prediction_inp_size=prediction_inp_size,
            delay=delay,
            device=device,
            epoch_check=check_flg,
            epoch=epoch,
            pred=pred,
            batch_size=batch_size,
            minibatch_size=minibatch_size,
            dy_squared=dy_squared)
        check_flg = False

        this_train_score = ra(
            percent_argmax_incorrect(output_from_state(training_states),
                                     y_train[ixs]))
        if is_online_checkpoint():
            print(
                f'Epoch {epoch:.3g}: Iter {i}: Score {this_train_score:.3g}%: Mean Rate: {sp(i):.2g}'
            )

        pi.print_update(info=f'Epoch: {epoch}')

    results[next, :], train_err, test_err, val_err = do_test()
    yield results
Esempio n. 38
0
def run_eqprop_training_update(x_data,
                               y_data,
                               layer_states: Sequence[IDynamicLayer],
                               beta: float,
                               random_flip_beta: bool,
                               learning_rate: float,
                               n_negative_steps: int,
                               n_positive_steps: int,
                               layer_constructor: Optional[Callable[
                                   [int, LayerParams], IDynamicLayer]] = None,
                               bidirectional: bool = True,
                               l2_loss: Optional[float] = None,
                               renew_activations: bool = True,
                               prop_direction=PropDirectionOptions.NEUTRAL,
                               splitstream=False,
                               rng=None) -> Sequence[IDynamicLayer]:

    if isinstance(prop_direction, (list, tuple)):
        negative_prop_direction, positive_prop_direction = prop_direction
    else:
        negative_prop_direction, positive_prop_direction = prop_direction, prop_direction

    rng = get_rng(rng)
    this_beta = beta * (torch.randint(2, size=()).float() * 2 -
                        1) if random_flip_beta else beta
    negative_states = last(
        run_negative_phase(x_data=x_data,
                           layer_states=layer_states,
                           n_steps=n_negative_steps,
                           prop_direction=negative_prop_direction))
    positive_states = last(
        run_positive_phase(x_data=x_data,
                           layer_states=negative_states,
                           beta=this_beta,
                           y_data=y_data,
                           n_steps=n_positive_steps,
                           prop_direction=positive_prop_direction))
    if splitstream:
        negative_states = last(
            run_negative_phase(x_data=x_data,
                               layer_states=negative_states,
                               n_steps=n_positive_steps,
                               prop_direction=positive_prop_direction))

    ws, bs = zip(*((s.params.w_aft, s.params.b) for s in layer_states[1:]))
    neg_acts, pos_acts = [[ls.potential for ls in later_state]
                          for later_state in (negative_states, positive_states)
                          ]
    new_ws, new_bs = eqprop_update(negative_acts=neg_acts,
                                   positive_acts=pos_acts,
                                   ws=ws,
                                   bs=bs,
                                   learning_rate=learning_rate,
                                   beta=this_beta,
                                   bidirectional=bidirectional,
                                   l2_loss=l2_loss)
    new_params = _params_vals_to_params(new_ws, new_bs)
    if renew_activations:
        assert layer_constructor is not None, 'If you choose renew_activations true, you must provide a layer constructor.'
        new_states = initialize_states(n_samples=x_data.shape[0],
                                       params=new_params,
                                       layer_constructor=layer_constructor)
    else:
        new_states = [
            dataclasses.replace(s, params=p)
            for s, p in izip_equal(positive_states, new_params)
        ]
    return new_states
Esempio n. 39
0
def run_eqprop_training_update(x_data,
                               y_data,
                               layer_states: Sequence[IDynamicLayer],
                               beta: float,
                               random_flip_beta: bool,
                               learning_rate: float,
                               n_negative_steps: int,
                               n_positive_steps: int,
                               layer_constructor: Optional[Callable[
                                   [int, LayerParams], IDynamicLayer]] = None,
                               bidirectional: bool = True,
                               l2_loss: Optional[float] = None,
                               renew_activations: bool = True,
                               prop_direction=PropDirectionOptions.NEUTRAL,
                               splitstream=False,
                               rng=None,
                               prediction_inp_size=None,
                               delay=None,
                               device='cpu',
                               epoch_check=False,
                               epoch=None,
                               pred=False,
                               batch_size=500,
                               minibatch_size=20,
                               dy_squared=None) -> Sequence[IDynamicLayer]:

    if isinstance(prop_direction, (list, tuple)):
        negative_prop_direction, positive_prop_direction = prop_direction
    else:
        negative_prop_direction, positive_prop_direction = prop_direction, prop_direction

    rng = get_rng(rng)
    this_beta = beta * (torch.randint(2, size=()).float() * 2 -
                        1) if random_flip_beta else beta

    ## randomly picked up data indices for the prediction and clamped phase data to update the weights
    update_data_idx = np.random.choice(batch_size,
                                       size=minibatch_size,
                                       replace=False)

    ## these indices are for training LS model to predict the activations
    train_ls_idx = [k for k in range(batch_size) if k not in update_data_idx]

    if pred:
        all_negative_states = run_negative_phase(
            x_data=x_data,
            layer_states=layer_states,
            n_steps=n_negative_steps,
            prop_direction=negative_prop_direction)
        negative_activations, negative_target_activations, negative_states, all_potential = get_all_potaintial(
            all_negative_states, prediction_inp_size, n_negative_steps,
            layer_states)
    else:
        negative_states = last(
            run_negative_phase(x_data=x_data,
                               layer_states=layer_states,
                               n_steps=n_negative_steps,
                               prop_direction=negative_prop_direction))

    #positive_states = last(run_positive_phase(x_data=x_data, layer_states=negative_states, beta=this_beta, delay=delay, y_data=y_data, n_steps=n_positive_steps, prop_direction=positive_prop_direction))

    positive_states = last(
        run_positive_phase(x_data=x_data,
                           layer_states=layer_states,
                           beta=this_beta,
                           delay=delay,
                           y_data=y_data,
                           n_steps=n_positive_steps,
                           prop_direction=positive_prop_direction))
    if splitstream:
        negative_states = last(
            run_negative_phase(x_data=x_data,
                               layer_states=negative_states,
                               n_steps=n_positive_steps,
                               prop_direction=positive_prop_direction))

    ws, bs = zip(*((s.params.w_aft, s.params.b) for s in layer_states[1:]))

    if pred:

        _, pos_acts = [[ls.potential for ls in later_state]
                       for later_state in (negative_states, positive_states)]

        pos_acts = [pos_act[update_data_idx, :] for pos_act in pos_acts]

        neg_act_layer_1 = negative_activations[1]
        neg_act_layer_2 = negative_activations[2]

        negative_activations[1] = neg_act_layer_1[:, :, ::2]
        negative_activations[2] = neg_act_layer_2[:, :, ::2]
        #print(np.shape(negative_activations[1]))
        # linear regression prediction
        neg_acts = predict_dynamics(negative_activations,
                                    negative_target_activations, layer_states,
                                    device, update_data_idx, train_ls_idx)

        #print(neg_acts[1])
#      if epoch > 1.0:
#        np.save('C:/Users/yoshi/work/01_python/01_bioplausible/spiking-eqprop/spiking_eqprop/with_delay19_inp18_pred_spike_ada_lr00030002_b500_m10/negative_states_epoch_' + str(epoch) + '.npy', negative_activations)
    else:
        neg_acts, pos_acts = [[ls.potential for ls in later_state]
                              for later_state in (negative_states,
                                                  positive_states)]

    new_ws, new_bs, dy_squared = eqprop_update(negative_acts=neg_acts,
                                               positive_acts=pos_acts,
                                               ws=ws,
                                               bs=bs,
                                               learning_rate=learning_rate,
                                               beta=this_beta,
                                               bidirectional=bidirectional,
                                               l2_loss=l2_loss,
                                               dy_squared=dy_squared)
    new_params = _params_vals_to_params(new_ws, new_bs)
    if renew_activations:
        assert layer_constructor is not None, 'If you choose renew_activations true, you must provide a layer constructor.'
        new_states = initialize_states(n_samples=x_data.shape[0],
                                       params=new_params,
                                       layer_constructor=layer_constructor)
    else:
        new_states = [
            dataclasses.replace(s, params=p)
            for s, p in izip_equal(positive_states, new_params)
        ]
    return new_states, dy_squared