def from_init(cls, w_init, layer_sizes, rng=None, last_layer_zero=False, **init_args): """ :param w_init: Can be: - A scalar, in which case w_init will be interpreted as the standard deviation for the Normally distributed initial weights. - A function which accepts the shape of the weight matrix as separate arguments. :param layer_sizes: A list of layer sizes, including the input layer :param rng: A random number generator or seed to use for drawing weights (only when w_init is a scalar) :param last_layer_zero: There is no need for the last layer to have initial weights. If this is True, the weights of the last layer will all be zero. :param **init_args: See MultiLayerPerceptron constructor """ if hasattr(w_init, '__call__'): assert rng is None, "If w_init is callable, the random number generator (rng) doesn't do anything, and shouldn't be specified." else: rng = get_rng(rng) w_init_mag = w_init w_init = lambda n_inputs, n_outputs: w_init_mag * rng.randn( n_in, n_out) weights = [ w_init(n_in, n_out) for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]) ] if last_layer_zero: weights[-1][:] = 0 return cls(weights=weights, **init_args)
def initialize_weight_matrix(n_in, n_out, mag='xavier', base_dist='normal', rng=None): """ Initialize a weight matrix :param n_in: Number of input units :param n_out: Number of output units :param mag: The magnitude, or a string identifying how to calculate the magnitude. String options can be: 'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers. 'xavier-both': - A compromize between preserving the variance of the forward, backward pass 'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net. :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array :param rng: Random number generator or seed :return: A shape (n_in, n_out) initial weight matrix. """ rng = get_rng(rng) w_base = rng.randn(n_in, n_out) if base_dist == 'normal' else \ (np.rand(n_in, n_out) - 0.5)*np.sqrt(12) if base_dist=='uniform' else \ bad_value(base_dist) mag_number = \ np.sqrt(2./(n_in+n_out)) if mag in ('xavier', 'xavier-both') else \ np.sqrt(1./n_in) if mag=='xavier-forward' else \ np.sqrt(2./n_in) if mag=='xavier-relu' else \ mag if isinstance(mag, numbers.Real) else \ bad_value(mag) return w_base * mag_number
def lowpass_random(n_samples, cutoff, n_dim=None, rng=None, normalize=False, slope=0): """ Return a random lowpass-filtered signal. :param n_samples: :param cutoff: :param rng: :return: """ rng = get_rng(rng) assert 0 <= cutoff <= 1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)" base_signal = rng.randn(n_samples) if n_dim is None else rng.randn( n_samples, n_dim) lowpass_signal = lowpass(base_signal, cutoff) if normalize is True: lowpass_signal = lowpass_signal / np.std(lowpass_signal) elif isinstance(normalize, tuple): lower, upper = normalize minsig, maxsig = np.min(lowpass_signal, axis=0), np.max(lowpass_signal, axis=0) lowpass_signal = ((lowpass_signal - minsig) / (maxsig - minsig)) * (upper - lower) + lower if slope != 0: ramp = slope * np.arange(len(lowpass_signal)) lowpass_signal = lowpass_signal + (ramp if n_dim is None else ramp[:, None]) return lowpass_signal
def create_maxout_network(layer_sizes, maxout_widths, w_init, output_activation='maxout', rng=None, **other_args): rng = get_rng(rng) n_expected_maxout_widths = len( layer_sizes) - 1 if output_activation == 'maxout' else len( layer_sizes) - 2 if isinstance(maxout_widths, (list, tuple)): assert len(maxout_widths) == n_expected_maxout_widths else: maxout_widths = [maxout_widths] * n_expected_maxout_widths weights = [ w_init * rng.randn(n_maps, n_in, n_out) for n_maps, n_in, n_out in zip( maxout_widths, layer_sizes[:-1], layer_sizes[1:]) ] # Note... we're intentionally starting the zip with maxout widths because we know it may be one element shorter than the layer-sizes if output_activation != 'maxout': weights.append(w_init * rng.randn(layer_sizes[-2], layer_sizes[-1])) return MultiLayerPerceptron(weights=weights, hidden_activation='maxout', output_activation=output_activation, **other_args)
def initialize_network_params(layer_sizes, mag='xavier-both', base_dist='normal', include_biases = True, scale=1., rng=None): """ Initialize parameters for a fully-connected neural network. :param layer_sizes: A list of integers indicating layer sizes (including that of the input layer) :param mag: The standard deviation, or a string identifying a method for selecting the standard deviation. String options can be: 'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers. 'xavier-both': - A compromize between preserving the variance of the forward, backward pass 'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net. :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array :param include_biases: Also create initial biases. :param rng: A random number generator or seed :return: A list of 2-tuples of (weight, bias) parameters (if include_biases is True) otherwise a list of weight matrices. Note: To get the weights/biases in separate lists, simply go: weights, biases = zip(*initialize_network_params(...)) Note: See http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization For a good explanation of the 'xavier' initialization schemes. """ rng = get_rng(rng) ws = [initialize_weight_matrix(n_in, n_out, mag=mag, base_dist=base_dist, scale=scale, rng=rng) for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:])] if include_biases: bs = [np.zeros(n_out) for n_out in layer_sizes[1:]] return zip(ws, bs) else: return ws
def demo_settling_dynamics(symmetric=False, n_hidden=50, n_out=3, input_influence=0.01, learning_rate=0.0001, cut_time=None, minibatch_size=1, decay=0.05, scale=.4, hidden_act='tanh', output_act='lin', draw_every=10, n_steps=10000, seed=124): """ Here we use Predictive Coding and compare_learning_curves the convergence of a predictive-coded network to one without. """ rng = get_rng(seed) net_d = Network.from_init(symmetric=symmetric, n_hidden=n_hidden, n_out=n_out, scale=scale, fh=hidden_act, fx=output_act, decay=decay, rng=rng) state_d = net_d.init_state(minibatch_size=minibatch_size) net_l = Network.from_init(symmetric=symmetric, n_hidden=n_hidden, n_out=n_out, scale=scale, fh=hidden_act, fx=output_act, decay=decay, rng=rng, input_influence=input_influence, learning_rate=learning_rate) state_l = net_l.init_state(minibatch_size=minibatch_size) sp = Speedometer() for t in range(n_steps): error = (state_d.x[0] - state_l.x[0]).mean() with hold_dbplots(draw_every=draw_every): dbplot(state_d.h[0], 'hd') dbplot(state_d.x[0], 'xd') dbplot(state_l.h[0], 'hl') dbplot(state_l.x[0], 'xl') dbplot(np.array([abs(net_l.w_hx).mean()]), 'wmag') dbplot(error, 'error') state_d = net_d.update(state_d) state_l = net_l.update( state_l, inp=state_d.x if cut_time is None or t < cut_time else None) if t % 100 == 0: print(f'Rate: {sp(t+1)} iter/s')
def proportional_random_assignment(length, split, rng): """ Generate an integer array of the given length, with elements randomly assigned to 0...len(split), with frequency of elements with value i proporational to split[i]. This is useful for splitting training/test sets. e.g. n_samples = 1000 x = np.random.randn(n_samples, 4) y = np.random.randn(n_samples) subsets = proportional_random_assignment(n_samples, split=0.7, rng=1234) x_train = x[subsets==0] y_train = y[subsets==0] x_test = x[subsets==1] y_test = y[subsets==1] :param length: The length of the output array :param split: Either a list of ratios to assign to each group (must add to <1), or a single float in (0, 1), which will indicate that we split into 2 groups. :param rng: A random number generator or seed. :return: An integer array. """ rng = get_rng(rng) if isinstance(split, float): split = [split] assert 0<=np.sum(split)<=1, "The sum of elements in split: {} must be in [0, 1]. Got {}".format(split, np.sum(split)) arr = np.zeros(length, dtype=int) cut_points = np.concatenate([np.round(np.cumsum(split)*length).astype(int), [length]]) scrambled_indices = rng.permutation(length) for i, (c_start, c_end) in enumerate(zip(cut_points[:-1], cut_points[1:])): arr[scrambled_indices[c_start:c_end]] = i+1 # Note we skip zero since arrays already inited to 0 return arr
def initialize_weight_matrix(n_in, n_out, mag='xavier', base_dist='normal', scale=1., rng=None): """ Initialize a weight matrix :param n_in: Number of input units :param n_out: Number of output units :param mag: The magnitude, or a string identifying how to calculate the magnitude. String options can be: 'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers. 'xavier-both': - A compromize between preserving the variance of the forward, backward pass 'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net. :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array :param rng: Random number generator or seed :return: A shape (n_in, n_out) initial weight matrix. """ rng = get_rng(rng) w_base = rng.randn(n_in, n_out) if base_dist == 'normal' else \ (rng.rand(n_in, n_out) - 0.5)*np.sqrt(12) if base_dist=='uniform' else \ bad_value(base_dist) mag_number = \ np.sqrt(2./(n_in+n_out)) if mag in ('xavier', 'xavier-both') else \ np.sqrt(1./n_in) if mag=='xavier-forward' else \ np.sqrt(2./n_in) if mag=='xavier-relu' else \ mag if isinstance(mag, numbers.Real) else \ bad_value(mag) return w_base * (mag_number*scale)
def lowpass_random(n_samples, cutoff, n_dim=None, rng=None, normalize=False, slope=0): """ Return a random lowpass-filtered signal. :param n_samples: :param cutoff: :param rng: :return: """ rng = get_rng(rng) assert 0 <= cutoff <= 1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)" base_signal = rng.randn(n_samples) if n_dim is None else rng.randn( n_samples, n_dim) lowpass_signal = lowpass(base_signal, cutoff) if normalize: lowpass_signal = lowpass_signal / np.std(lowpass_signal) if slope != 0: ramp = slope * np.arange(len(lowpass_signal)) lowpass_signal = lowpass_signal + (ramp if n_dim is None else ramp[:, None]) return lowpass_signal
def initialize_network_params(layer_sizes, mag='xavier-both', base_dist='normal', last_layer_zero = False, include_biases = True, scale=1., rng=None): """ Initialize parameters for a fully-connected neural network. :param layer_sizes: A list of integers indicating layer sizes (including that of the input layer) :param mag: The standard deviation, or a string identifying a method for selecting the standard deviation. String options can be: 'xavier-forward' - Best for preserving variance of a linear, tanh, or sigmoidal network across layers. 'xavier-both': - A compromize between preserving the variance of the forward, backward pass 'xavier-relu': - Best for preserving variance on the forward pass in a ReLU net. :param base_dist: 'normal' or 'uniform', or a function taking (n_in, n_out) and returning a (n_in, n_out) array :param include_biases: Also create initial biases. :param rng: A random number generator or seed :return: A list of 2-tuples of (weight, bias) parameters (if include_biases is True) otherwise a list of weight matrices. Note: To get the weights/biases in separate lists, simply go: weights, biases = zip(*initialize_network_params(...)) Note: See http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization For a good explanation of the 'xavier' initialization schemes. """ rng = get_rng(rng) ws = [initialize_weight_matrix(n_in, n_out, mag=mag, base_dist=base_dist, scale=scale, rng=rng) for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:])] if last_layer_zero: ws[-1][:] = 0 if include_biases: bs = [np.zeros(n_out) for n_out in layer_sizes[1:]] return zip(ws, bs) else: return ws
def scaled_quantized_forward_pass(inputs, weights, scales = None, biases = None, hidden_activations='relu', output_activations = 'relu', quantization_method = 'herd', rng=None): """ Return the activations from a forward pass of a ReLU net. :param inputs: A (n_frames, n_dims_in) array :param weights: A list of (n_dim_in, n_dim_out) weight matrices :param biases: An optional (len(weights)) list of (w.shape[1]) biases for each weight matrix :param hidden_activations: Indicates the hidden layer activation function :param output_activations: Indicates the output layer activation function :param quantization_method: The method for quantizing (see function: sequential_quantize) :param rng: A random number generator or seed :return: activations: A len(weights)*3+1 list of (n_frames, n_dims) activations. Elements [::3] will be a length(w)+1 list containing the input to each rounding unit, and the final output Elements [1::3] will be the length(w) rounded "spike" signal. Elements [2::3] will be the length(w) inputs to each nonlinearity """ rng = get_rng(rng) activations = [inputs] if biases is None: biases = [0]*len(weights) else: assert len(biases)==len(weights) if scales is None: scales = [1.]*len(weights) x = inputs # (n_samples, n_units) for i, (w, b, k) in enumerate(izip_equal(weights, biases, scales)): s = quantize(x*k, method=quantization_method, rng=rng) u = (s/k).dot(w)+b x = activation_function(u, output_activations if i==len(weights)-1 else hidden_activations) activations += [s, u, x] return activations
def get_synthetic_deep_data(n_samples, layer_sizes, hidden_activations='softplus', output_activation='linear', normalize=True, rng=1234): """ Generate data from a randomly initialized neural network. :param n_samples: Number of samples to generate :param layer_sizes: Sizes of network layers :param hidden_activations: Hidden activation functions :param output_activation: Output activation function :param normalize: Normalize the output over samples (remove global mean, divide by std) :param rng: A random number generator or seed. :return: x, y x is an (n_samples, layer_sizes[0]) array y is a (n_samples, layer_sizes[-1]) array """ rng = get_rng(rng) ws = initialize_network_params(layer_sizes=layer_sizes, mag='xavier-forward', include_biases=False, rng=rng) x = rng.randn(n_samples, layer_sizes[0]) y = forward_pass(input_data=x, weights=ws, hidden_activations=hidden_activations, output_activation=output_activation) if normalize: y = (y - y.mean(axis=0)) / y.std(axis=0) return x, y
def __init__(self, kp, ki=0., kd=0., noise = 0., rng = None): self.kp = kp self.kd = kd self.ki = ki self.xp = 0 self.s = 0 self.noise = noise self.rng = get_rng(rng)
def __init__(self, kp, ki=0., kd=0., noise=0., rng=None): self.kp = kp self.kd = kd self.ki = ki self.xp = 0 self.s = 0 self.noise = noise self.rng = get_rng(rng)
def stochastically_rounded_relu_forward_pass_guts(weights, input_data, n_steps, rng = None): rng = get_rng(rng) activation = np.round(n_steps*input_data + rng.uniform(-.5, .5, size=input_data.shape))/float(n_steps) activations = [activation] for w in weights: u = np.maximum(0, activation.dot(w)) activation = np.round(u*n_steps + rng.uniform(-.5, .5, size=u.shape))/float(n_steps) activations.append(activation) return activations
def estimate_log_z(w, b_h, b_v, annealing_ratios, n_runs = 10, rng = None): """ Use Annealed importance sampling http://www.iro.umontreal.ca/~lisa/pointeurs/breuleux+bengio_nc2011.pdf To estimate the probability of the test data given the RBM parameters. This code is a Pythonified version of Russ Salakhutdinov's Matlab code: http://www.utstat.toronto.edu/~rsalakhu/code_AIS/RBM_AIS.m NOTE: THIS CODE DOES NOT SEEM TO BE PRODUCING GOOD RESULTS (They don't match with exact numbers. Not sure why!) Better option: Use the rbm_ais method from pylearn2 (from pylearn2.rbm_tools import rbm_ais) :param w: Weights (n_visible, n_hidden) :param b_h: Hidden biases (n_hidden) :param b_v: Visible biases (n_visible) :param annealing_ratios: A monotonically increasing vector from 0 to 1 :param n_runs: Number of annealing chains to use. :param rng: Random Number generator :return: """ assert annealing_ratios[0]==0 and annealing_ratios[-1]==1 and np.all(np.diff(annealing_ratios)>0) rng = get_rng(rng) n_visible, n_hidden = w.shape visbiases_base = np.zeros_like(b_v) neg_data = rng.rand(n_runs, n_visible) < sigm(visbiases_base) # Collect logww = - neg_data.dot(visbiases_base) - n_hidden*np.log(2) w_h = neg_data.dot(w)+b_h bv_base = neg_data.dot(visbiases_base) bee_vee = bv_base for t, r in enumerate(annealing_ratios): exp_wh = np.exp(r*w_h) logww += (1-r)*bv_base + r*bee_vee + np.sum(np.log(1+exp_wh), axis =1) wake_hid_probs = exp_wh/(1+exp_wh) wake_hid_states = wake_hid_probs > rng.rand(*wake_hid_probs.shape) neg_vis_probs = sigm((1-r)*visbiases_base + r*(wake_hid_states.dot(w.T)+b_v)) neg_vis_states = neg_vis_probs > rng.rand(*neg_vis_probs.shape) w_h = neg_vis_states.dot(w)+b_h bv_base = neg_vis_states.dot(visbiases_base) bee_vee = neg_vis_states.dot(b_v) exp_wh = np.exp(r*w_h) logww -= (1-r)*bv_base + r*bee_vee + np.sum(np.log(1+exp_wh), axis = 1) exp_wh = np.exp(w_h) logww += neg_data.dot(b_v) + np.sum(np.log(1+exp_wh), axis = 1) np.mean(logww) r_ais = logsumexp(logww) - np.log(n_runs) log_z_base = np.sum(np.log(1+np.exp(visbiases_base))) + n_hidden*np.log(2) log_z_est = r_ais + log_z_base aa = np.mean(logww) logstd_AIS = np.log(np.std(np.exp(logww-aa))) + aa - np.log(n_runs)/2 logZZ_est_up = logsumexp([np.log(3)+logstd_AIS, r_ais], axis = 0) + log_z_base logZZ_est_down = logdiffexp([(np.log(3)+logstd_AIS), r_ais], axis = 0) + log_z_base return log_z_est, (logZZ_est_up, logZZ_est_down)
def initialize_conv_kernel(kernel_shape, mag = 'xavier', rng = None): rng = get_rng(rng) if mag=='xavier': n_kern_out, n_kern_in, k_size_y, k_size_x = kernel_shape fan_in = k_size_y*k_size_x*n_kern_in fan_out = n_kern_out*k_size_y+k_size_x mag = np.sqrt(2./(fan_in+fan_out)) else: assert isinstance(mag, (int, float)), mag return mag*rng.randn(*kernel_shape)
def initialize_conv_kernel(kernel_shape, mag='xavier', rng=None): rng = get_rng(rng) if mag == 'xavier': n_kern_out, n_kern_in, k_size_y, k_size_x = kernel_shape fan_in = k_size_y * k_size_x * n_kern_in fan_out = n_kern_out * k_size_y + k_size_x mag = np.sqrt(2. / (fan_in + fan_out)) else: assert isinstance(mag, (int, float)), mag return mag * rng.randn(*kernel_shape)
def __init__(self, precision=1., threshold=0.5, values=None, phi_init=0, rng=None): assert values is None or len(values) == 2 self.rng = get_rng(rng) self.phi = phi_init self.precision = precision self.threshold = threshold self.values = values
def demo_variational_autoencoder(minibatch_size=100, n_epochs=2000, plot_interval=100, seed=None): """ Train a Variational Autoencoder on MNIST and look at the samples it generates. :param minibatch_size: Number of elements in the minibatch :param n_epochs: Number of passes through dataset :param plot_interval: Plot every x iterations """ data = get_mnist_dataset(flat=True).training_set.input if is_test_mode(): n_epochs = 1 minibatch_size = 10 data = data[:100] rng = get_rng(seed) model = VariationalAutoencoder(pq_pair=EncoderDecoderNetworks( x_dim=data.shape[1], z_dim=20, encoder_hidden_sizes=[200], decoder_hidden_sizes=[200], w_init=lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out), x_distribution='bernoulli', z_distribution='gaussian', hidden_activation='softplus'), optimizer=AdaMax(alpha=0.003), rng=rng) training_fcn = model.train.compile() sampling_fcn = model.sample.compile() for i, minibatch in enumerate( minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)): training_fcn(minibatch) if i % plot_interval == 0: print 'Epoch %s' % (i * minibatch_size / float(len(data)), ) samples = sampling_fcn(25).reshape(5, 5, 28, 28) dbplot(samples, 'Samples from Model') dbplot( model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape( -1, 28, 28), 'dec') dbplot( model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape( -1, 28, 28), 'enc')
def initialize_params(layer_sizes: Sequence[int], initial_weight_scale=1., rng=None) -> Sequence[LayerParams]: rng = get_rng(rng) ws = [ uniform(low=-initial_weight_scale * (6. / (n_pre + n_post))**.5, high=(6. / (n_pre + n_post))**.5, size=(n_pre, n_post)) for n_pre, n_post in izip_equal(layer_sizes[:-1], layer_sizes[1:]) ] bs = [torch.zeros(n_post) for n_post in layer_sizes[1:]] return _params_vals_to_params(ws, bs)
def from_initializer(cls, n_in, n_out, w_init_mag=0.01, rng=None, **kwargs): rng = get_rng(rng) return cls(w=w_init_mag * rng.randn(n_in, n_out), b=np.zeros(n_out), w_rev=w_init_mag * rng.randn(n_out, n_in), b_rev=np.zeros(n_in), rng=rng, **kwargs)
def get_vgg_video_splice(video_identifiers, shuffle=False, shuffling_rng=None): videos = np.concatenate([ load_ilsvrc_video(identifier, size=(224, 224)) for identifier in video_identifiers ]) vgg_mode_videos = im2vgginput(videos) if shuffle: rng = get_rng(shuffling_rng) rng.shuffle(vgg_mode_videos) return videos, vgg_mode_videos
def discretize(x, approach='noisy-round', scale = 1, rng = None): if rng is None: rng = get_rng(rng) if approach == 'noisy-round': return np.round(x*scale + rng.uniform(low=-.5, high=.5, size=x.shape))/scale elif approach == 'round': return np.round(x*scale)/scale elif approach == 'noisy-add': return x + rng.uniform(-.5, .5, size=x.shape)/scale elif approach == 'surrogate-noise': return x + (12**.5)*((x%1)-(x%1)**2)*rng.uniform(low=-.5, high=.5, size=x.shape)/scale else: raise Exception('No discretization approach: %s' % approach)
def stochastically_rounded_relu_forward_pass_guts(weights, input_data, n_steps, rng=None): rng = get_rng(rng) activation = np.round(n_steps * input_data + rng.uniform( -.5, .5, size=input_data.shape)) / float(n_steps) activations = [activation] for w in weights: u = np.maximum(0, activation.dot(w)) activation = np.round( u * n_steps + rng.uniform(-.5, .5, size=u.shape)) / float(n_steps) activations.append(activation) return activations
def lowpass_random(n_samples, cutoff, n_dim=None, rng = None, normalize = False, slope=0): """ Return a random lowpass-filtered signal. :param n_samples: :param cutoff: :param rng: :return: """ rng = get_rng(rng) assert 0<=cutoff<=1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)" base_signal = rng.randn(n_samples) if n_dim is None else rng.randn(n_samples, n_dim) lowpass_signal = lowpass(base_signal, cutoff) if normalize: lowpass_signal = lowpass_signal/np.std(lowpass_signal) if slope != 0: ramp = slope*np.arange(len(lowpass_signal)) lowpass_signal = lowpass_signal+(ramp if n_dim is None else ramp[:, None]) return lowpass_signal
def from_init(cls, n_hidden, n_out, b_h=0, rng=None, scale=.1, symmetric=False, **kwargs): rng = get_rng(rng) w_hh = scale * initialize_weight_matrix( n_in=n_hidden, n_out=n_hidden, rng=rng) if symmetric: w_hh = .5 * (w_hh + w_hh.T) w_hx = scale * initialize_weight_matrix( n_in=n_hidden, n_out=n_out, rng=rng) w_xh = w_hx.T if symmetric else scale * initialize_weight_matrix( n_in=n_out, n_out=n_hidden, rng=rng) return Network(w_hh=w_hh, w_hx=w_hx, w_xh=w_xh, b_h=b_h, **kwargs)
def from_init(specifiers, input_shape, w_init=0.01, force_shared_parameters=True, rng=None): """ Convenient initialization function. :param specifiers: :param input_shape: :param w_init: :param force_shared_parameters: Use shared parameters for conv layer (allows training). :param rng: :return: """ rng = get_rng(rng) n_maps, n_rows, n_cols = input_shape layers = OrderedDict() if isinstance(specifiers, (list, tuple)): specifiers = OrderedDict(enumerate(specifiers)) for spec_name, spec in specifiers.iteritems(): if isinstance(spec, ConvInitSpec): spec = ConvolverSpec( w=w_init * rng.randn(spec.n_maps, n_maps, spec.filter_size[0], spec.filter_size[1]), b=np.zeros(spec.n_maps) if spec.use_bias else False, mode=spec.mode) if isinstance(spec, ConvolverSpec): n_maps = spec.w.shape[0] if spec.mode == 'valid': n_rows += -spec.w.shape[2] + 1 n_cols += -spec.w.shape[3] + 1 elif isinstance(spec.mode, int): n_rows += -spec.w.shape[2] + 1 + spec.mode * 2 n_cols += -spec.w.shape[3] + 1 + spec.mode * 2 elif isinstance(spec, PoolerSpec): n_rows /= spec.region[0] n_cols /= spec.region[1] layers[spec_name] = specifier_to_layer( spec, force_shared_parameters=force_shared_parameters, rng=rng) LOGGER.info('Layer "%s" (%s) output shape: %s' % (spec_name, spec.__class__.__name__, (n_maps, n_rows, n_cols))) return ConvNet(layers)
def scaled_quantized_forward_pass(inputs, weights, scales=None, biases=None, hidden_activations='relu', output_activations='relu', quantization_method='herd', rng=None): """ Return the activations from a forward pass of a ReLU net. :param inputs: A (n_frames, n_dims_in) array :param weights: A list of (n_dim_in, n_dim_out) weight matrices :param biases: An optional (len(weights)) list of (w.shape[1]) biases for each weight matrix :param hidden_activations: Indicates the hidden layer activation function :param output_activations: Indicates the output layer activation function :param quantization_method: The method for quantizing (see function: sequential_quantize) :param rng: A random number generator or seed :return: activations: A len(weights)*3+1 list of (n_frames, n_dims) activations. Elements [::3] will be a length(w)+1 list containing the input to each rounding unit, and the final output Elements [1::3] will be the length(w) rounded "spike" signal. Elements [2::3] will be the length(w) inputs to each nonlinearity """ rng = get_rng(rng) activations = [inputs] if biases is None: biases = [0] * len(weights) else: assert len(biases) == len(weights) if scales is None: scales = [1.] * len(weights) x = inputs # (n_samples, n_units) for i, (w, b, k) in enumerate(izip_equal(weights, biases, scales)): s = quantize(x * k, method=quantization_method, rng=rng) u = (s / k).dot(w) + b x = activation_function( u, output_activations if i == len(weights) - 1 else hidden_activations) activations += [s, u, x] return activations
def get_synthetic_deep_data(n_samples, layer_sizes, hidden_activations='softplus', output_activation='linear', normalize = True, rng=1234): """ Generate data from a randomly initialized neural network. :param n_samples: Number of samples to generate :param layer_sizes: Sizes of network layers :param hidden_activations: Hidden activation functions :param output_activation: Output activation function :param normalize: Normalize the output over samples (remove global mean, divide by std) :param rng: A random number generator or seed. :return: x, y x is an (n_samples, layer_sizes[0]) array y is a (n_samples, layer_sizes[-1]) array """ rng = get_rng(rng) ws = initialize_network_params(layer_sizes=layer_sizes, mag = 'xavier-forward', include_biases=False, rng=rng) x = rng.randn(n_samples, layer_sizes[0]) y = forward_pass(input_data=x, weights=ws, hidden_activations=hidden_activations, output_activation=output_activation) if normalize: y = (y - y.mean(axis=0))/y.std(axis=0) return x, y
def __init__(self, layers, optimizer, layerwise_scales=True, corruption_type='round', rng=None): """ layers is an OrdereDict of callables. """ assert layerwise_scales, 'Only layerwise work now.' if isinstance(layers, (list, tuple)): layers = OrderedDict(enumerate(layers)) else: assert isinstance( layers, OrderedDict ), "Layers must be presented as a list, tuple, or OrderedDict" self.layers = layers self.optimizer = optimizer self.layerwise_scales = layerwise_scales self.corruption_type = corruption_type self.rng = get_rng(rng)
def proportional_random_assignment(length, split, rng): """ Generate an integer array of the given length, with elements randomly assigned to 0...len(split), with frequency of elements with value i proporational to split[i]. This is useful for splitting training/test sets. e.g. n_samples = 1000 x = np.random.randn(n_samples, 4) y = np.random.randn(n_samples) subsets = proportional_random_assignment(n_samples, split=0.7, rng=1234) x_train = x[subsets==0] y_train = y[subsets==0] x_test = x[subsets==1] y_test = y[subsets==1] :param length: The length of the output array :param split: Either a list of ratios to assign to each group (must add to <1), or a single float in (0, 1), which will indicate that we split into 2 groups. :param rng: A random number generator or seed. :return: An integer array. """ rng = get_rng(rng) if isinstance(split, float): split = [split] assert 0 <= np.sum( split ) <= 1, "The sum of elements in split: {} must be in [0, 1]. Got {}".format( split, np.sum(split)) arr = np.zeros(length, dtype=int) cut_points = np.concatenate( [np.round(np.cumsum(split) * length).astype(int), [length]]) scrambled_indices = rng.permutation(length) for i, (c_start, c_end) in enumerate(zip(cut_points[:-1], cut_points[1:])): arr[scrambled_indices[ c_start: c_end]] = i + 1 # Note we skip zero since arrays already inited to 0 return arr
def sequential_quantize(v, n_steps = None, method='herd', rng = None): """ :param v: A (..., n_samples, n_units, ) array :param n_steps: The number of steps to spike for :return: An (..., n_steps, n_units) array of quantized values """ rng = get_rng(rng) assert v.ndim>=2 if n_steps is None: n_steps = v.shape[-2] else: assert n_steps == v.shape[-2] if method=='herd': result = fixed_diff(np.round(np.cumsum(v, axis=-2)), axis=-2) elif method=='herd2': result = fixed_diff(fixed_diff(np.round(np.cumsum(np.cumsum(v, axis=-2), axis=-2)), axis=-2), axis=-2) elif method=='round': result = np.round(v) elif method == 'slippery.9': result = slippery_round(v, slip=0.9) elif method == 'slippery.5': result = slippery_round(v, slip=0.5) elif method == 'randn': result = v + rng.randn(*v.shape) elif method=='uniform': result = v + rng.uniform(-.5, .5, size=v.shape) elif method=='surrogate-noise': result = v + (12**.5)*((v%1)-(v%1)**2)*rng.uniform(low=-.5, high=.5, size=v.shape) elif method == 'surrogate-sqrt': result = v + np.sqrt((12**.5)*((v%1)-(v%1)**2)*rng.uniform(low=-.5, high=.5, size=v.shape)) elif method is None: result = v else: raise NotImplementedError("Don't have quantization method '%s' implemented" % (method, )) return result
def experiment_mnist_eqprop( layer_constructor, n_epochs=10, hidden_sizes=(500, ), minibatch_size=20, beta=.5, random_flip_beta=True, learning_rate=.05, n_negative_steps=20, n_positive_steps=4, initial_weight_scale=1., online_checkpoints_period=None, epoch_checkpoint_period=.25, skip_zero_epoch_test=False, n_test_samples=None, prop_direction: Union[str, Tuple] = 'neutral', bidirectional=True, renew_activations=True, do_fast_forward_pass=False, rebuild_coders=True, l2_loss=None, splitstream=True, seed=1234, ): """ Replicate the results of Scellier & Bengio: Equilibrium Propagation: Bridging the Gap between Energy-Based Models and Backpropagation https://www.frontiersin.org/articles/10.3389/fncom.2017.00024/full Specifically, the train_model demo here: https://github.com/bscellier/Towards-a-Biologically-Plausible-Backprop Differences between our code and theirs: - We do not keep persistent layer activations tied to data points over epochs. So our results should only really match for the first epoch. - We evaluate training score periodically, rather than online average (however you can see online score by setting online_checkpoints_period to something that is not None) """ print('Params:\n' + '\n'.join(list(f' {k} = {v}' for k, v in locals().items()))) rng = get_rng(seed) n_in = 784 n_out = 10 dataset = get_mnist_dataset(flat=True, n_test_samples=None).to_onehot() x_train, y_train = dataset.training_set.xy x_test, y_test = dataset.test_set.xy # Their 'validation set' is our 'test set' if is_test_mode(): x_train, y_train, x_test, y_test = x_train[: 100], y_train[: 100], x_test[: 100], y_test[: 100] n_epochs = 1 layer_sizes = [n_in] + list(hidden_sizes) + [n_out] rng = get_rng(rng) y_train = y_train.astype(np.float32) ra = RunningAverage() sp = Speedometer(mode='last') is_online_checkpoint = Checkpoints( online_checkpoints_period, skip_first=skip_zero_epoch_test ) if online_checkpoints_period is not None else lambda: False is_epoch_checkpoint = Checkpoints(epoch_checkpoint_period, skip_first=skip_zero_epoch_test) results = Duck() training_states = initialize_states( layer_constructor=layer_constructor, n_samples=minibatch_size, params=initialize_params(layer_sizes=layer_sizes, initial_weight_scale=initial_weight_scale, rng=rng)) if isinstance(prop_direction, str): fwd_prop_direction, backward_prop_direction = prop_direction, prop_direction else: fwd_prop_direction, backward_prop_direction = prop_direction for i, (ixs, info) in enumerate( minibatch_index_info_generator(n_samples=x_train.shape[0], minibatch_size=minibatch_size, n_epochs=n_epochs)): epoch = i * minibatch_size / x_train.shape[0] if is_epoch_checkpoint(epoch): n_samples = n_test_samples if n_test_samples is not None else len( x_test) y_pred_test, y_pred_train = [ run_inference( x_data=x[:n_test_samples], states=initialize_states( layer_constructor=layer_constructor, params=[s.params for s in training_states], n_samples=min(len(x), n_test_samples) if n_test_samples is not None else len(x)), n_steps=n_negative_steps, prop_direction=fwd_prop_direction, ) for x in (x_test, x_train) ] # y_pred_train = run_inference(x_data=x_train[:n_test_samples], states=initialize_states(params=[s.params for s in training_states], n_samples=min(len(x_train), n_test_samples) if n_test_samples is not None else len(x_train))) test_error = percent_argmax_incorrect(y_pred_test, y_test[:n_test_samples]) train_error = percent_argmax_incorrect(y_pred_train, y_train[:n_test_samples]) print( f'Epoch: {epoch:.3g}, Iter: {i}, Test Error: {test_error:.3g}%, Train Error: {train_error:.3g}, Mean Rate: {sp(i):.3g}iter/s' ) results[next, :] = dict(iter=i, epoch=epoch, train_error=train_error, test_error=test_error) yield results if epoch > 2 and train_error > 50: return # The Original training loop, just taken out here: x_data_sample, y_data_sample = x_train[ixs], y_train[ixs] training_states = run_eqprop_training_update( x_data=x_data_sample, y_data=y_data_sample, layer_states=training_states, beta=beta, random_flip_beta=random_flip_beta, learning_rate=learning_rate, layer_constructor=layer_constructor, bidirectional=bidirectional, l2_loss=l2_loss, renew_activations=renew_activations, n_negative_steps=n_negative_steps, n_positive_steps=n_positive_steps, prop_direction=prop_direction, splitstream=splitstream, rng=rng) this_train_score = ra( percent_argmax_correct(output_from_state(training_states), y_train[ixs])) if is_online_checkpoint(): print( f'Epoch {epoch:.3g}: Iter {i}: Score {this_train_score:.3g}%: Mean Rate: {sp(i):.2g}' )
def init_state(self, minibatch_size, rng=None): rng = get_rng(rng) n_hidden, n_out = self.w_hx.shape return NetworkState(h=rng.randn(minibatch_size, n_hidden), x=rng.randn(minibatch_size, n_out))
def demo_quantized_convergence( quantized_layer_constructor, smooth_epsilon=0.5, layer_sizes=(500, 500, 10), initialize_acts_randomly=False, minibatch_size=1, # n_steps = 100, n_steps=10000, initial_weight_scale=1., prop_direction='neutral', data_seed=1241, param_seed=1237, hang=True, plot=False): """ """ smooth_layer_constructor = SimpleLayerController.get_partial_constructor( epsilon=smooth_epsilon) print('Params:\n' + '\n'.join(list(f' {k} = {v}' for k, v in locals().items()))) data_rng = get_rng(data_seed) param_rng = get_rng(param_seed) HISTORY_LEN = n_steps N_NEURONS_TO_PLOT = 10 if is_test_mode(): n_steps = 10 pi = ProgressIndicator(update_every='2s', expected_iterations=2 * n_steps) n_in, n_out = layer_sizes[0], layer_sizes[-1] x_data = data_rng.rand(minibatch_size, n_in) params = initialize_params(layer_sizes=layer_sizes, initial_weight_scale=initial_weight_scale, rng=param_rng) def run_update(layer_constructor, mode): plt.gca().set_prop_cycle(None) states = initialize_states(layer_constructor=layer_constructor, n_samples=minibatch_size, params=params) for t in range(n_steps): states = eqprop_step(layer_states=states, x_data=x_data, beta=0, y_data=None, direction=prop_direction) acts = [s.potential for s in states] yield acts if plot: dbplot_collection( [a[0, :N_NEURONS_TO_PLOT] for a in acts], f'{mode} acts', axis='acts', draw_every='5s', cornertext=f'Negative Phase: {t}', plot_type=lambda: MovingPointPlot( buffer_len=HISTORY_LEN, plot_kwargs=dict(linestyle='-.' if mode == 'Smooth' else '-'), reset_color_cycle=True)) # dbplot_collection([a[0, :N_NEURONS_TO_PLOT] for a in acts], f'{mode} acts', axis='acts', draw_every=1, cornertext=f'Negative Phase: {t}', plot_type = lambda: MovingPointPlot(buffer_len=HISTORY_LEN, plot_kwargs=dict(linestyle = '-.' if mode=='Smooth' else '-'), reset_color_cycle=True)) pi() smooth_record = list( run_update(layer_constructor=smooth_layer_constructor, mode='Smooth')) smooth_acts = smooth_record[-1] rough_record = list( run_update(layer_constructor=quantized_layer_constructor, mode='Rough')) rough_acts = rough_record[-1] rs_online_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_rough, hs_smooth in zip(rough_record, smooth_record)]) rs_end_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_smooth in [smooth_record[-1]] for hs_rough in rough_record]) rr_end_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_smooth in [rough_record[-1]] for hs_rough in rough_record]) ss_end_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_smooth in [smooth_record[-1]] for hs_rough in smooth_record]) mean_abs_error = np.mean(rs_online_errors, axis=0) final_abs_error = rs_online_errors[-1] print( f'Mean Abs Layerwise Errors: {np.array_str(mean_abs_error, precision=5)}\t Final Layerwise Errors: {np.array_str(final_abs_error, precision=5)}' ) return rs_online_errors, rs_end_errors, rr_end_errors, ss_end_errors
def experiment_mnist_eqprop_torch( layer_constructor: Callable[[int, LayerParams], IDynamicLayer], n_epochs=10, hidden_sizes=(500, ), minibatch_size=10, # update mini-batch size batch_size=500, # total batch size beta=.5, random_flip_beta=True, learning_rate=.05, n_negative_steps=120, n_positive_steps=80, initial_weight_scale=1., online_checkpoints_period=None, epoch_checkpoint_period=1.0, #'100s', #{0: .25, 1: .5, 5: 1, 10: 2, 50: 4}, skip_zero_epoch_test=False, n_test_samples=10000, prop_direction: Union[str, Tuple] = 'neutral', bidirectional=True, renew_activations=True, do_fast_forward_pass=False, rebuild_coders=True, l2_loss=None, splitstream=False, seed=1234, prediction_inp_size=17, ## prediction input size delay=18, ## delay size for the clamped phase pred=True, ## if you want to use the prediction check_flg=False, ): """ Replicate the results of Scellier & Bengio: Equilibrium Propagation: Bridging the Gap between Energy-Based Models and Backpropagation https://www.frontiersin.org/articles/10.3389/fncom.2017.00024/full Specifically, the train_model demo here: https://github.com/bscellier/Towards-a-Biologically-Plausible-Backprop Differences between our code and theirs: - We do not keep persistent layer activations tied to data points over epochs. So our results should only really match for the first epoch. - We evaluate training score periodically, rather than online average (however you can see online score by setting online_checkpoints_period to something that is not None) """ torch.manual_seed(seed) device = 'cuda' if torch.cuda.is_available( ) and USE_CUDA_WHEN_AVAILABLE else 'cpu' if device == 'cuda': torch.set_default_tensor_type(torch.cuda.FloatTensor) print(f'Using Device: {device}') print('Params:\n' + '\n'.join(list(f' {k} = {v}' for k, v in locals().items()))) rng = get_rng(seed) n_in = 784 n_out = 10 dataset = input_data.read_data_sets('MNIST_data', one_hot=True) x_train, y_train = torch.tensor( dataset.train.images, dtype=torch.float32 ).to(device), torch.tensor(dataset.train.labels, dtype=torch.float32).to( device ) #(torch.tensor(a.astype(np.float32)).to(device) for a in dataset.mnist.train.images.xy) x_test, y_test = torch.tensor( dataset.test.images, dtype=torch.float32).to(device), torch.tensor( dataset.test.labels, dtype=torch.float32).to( device) # Their 'validation set' is our 'test set' x_val, y_val = torch.tensor( dataset.validation.images, dtype=torch.float32).to(device), torch.tensor( dataset.validation.labels, dtype=torch.float32).to( device) # Their 'validation set' is our 'test set' if is_test_mode(): x_train, y_train, x_test, y_test, x_val, y_val = x_train[: 100], y_train[: 100], x_test[: 100], y_test[: 100], x_val[: 100], y_val[: 100] n_epochs = 1 n_negative_steps = 3 n_positive_steps = 3 layer_sizes = [n_in] + list(hidden_sizes) + [n_out] ra = RunningAverage() sp = Speedometer(mode='last') is_online_checkpoint = Checkpoints( online_checkpoints_period, skip_first=skip_zero_epoch_test ) if online_checkpoints_period is not None else lambda: False is_epoch_checkpoint = Checkpoints(epoch_checkpoint_period, skip_first=skip_zero_epoch_test) training_states = initialize_states( layer_constructor=layer_constructor, #n_samples=minibatch_size, n_samples=batch_size, params=initialize_params(layer_sizes=layer_sizes, initial_weight_scale=initial_weight_scale, rng=rng)) # dbplot(training_states[0].params.w_fore[:10, :10], str(rng.randint(265))) if isinstance(prop_direction, str): fwd_prop_direction, backward_prop_direction = prop_direction, prop_direction else: fwd_prop_direction, backward_prop_direction = prop_direction def do_test(): # n_samples = n_test_samples if n_test_samples is not None else len(x_test) test_error, train_error, val_error = [ percent_argmax_incorrect( run_inference( x_data=x[:n_test_samples], states=initialize_states( layer_constructor=layer_constructor, params=[s.params for s in training_states], n_samples=n_samples), n_steps=n_negative_steps, prop_direction=fwd_prop_direction, ), y[:n_samples]).item() for x, y in [(x_test, y_test), (x_train, y_train), (x_val, y_val)] for n_samples in [ min(len(x), n_test_samples ) if n_test_samples is not None else len(x) ] ] # Not an actal loop... just hack for assignment in comprehensions print( f'Epoch: {epoch:.3g}, Iter: {i}, Test Error: {test_error:.3g}%, Train Error: {train_error:.3g}, Validation Error: {val_error:.3g}, Mean Rate: {sp(i):.3g}iter/s' ) return dict(iter=i, epoch=epoch, train_error=train_error, test_error=test_error, val_error=val_error), train_error, test_error, val_error results = Duck() pi = ProgressIndicator(expected_iterations=n_epochs * dataset.train.num_examples / minibatch_size, update_every='10s') dy_squared = [] dy_squared.append(None) dy_squared.append(None) for i, (ixs, info) in enumerate( minibatch_index_info_generator(n_samples=x_train.size()[0], minibatch_size=batch_size, n_epochs=n_epochs)): epoch = i * batch_size / x_train.shape[0] if is_epoch_checkpoint(epoch): check_flg = False x_train, y_train = shuffle_data(x_train, y_train) with pi.pause_measurement(): results[next, :], train_err, test_err, val_err = do_test() ## prepare for saving the parameters ws, bs = zip(*((s.params.w_aft, s.params.b) for s in training_states[1:])) f = None if os.path.isfile(directory + '/log.txt'): f = open(directory + '/log.txt', 'a') else: os.mkdir(directory) f = open(directory + '/log.txt', 'w') f.write("Epoch: " + str(epoch) + '\n') f.write("accuracy for training: " + str(train_err) + '\n') f.write("accuracy for testing: " + str(test_err) + '\n') f.write("accuracy for validation: " + str(val_err) + '\n') f.close() np.save(directory + '/w_epoch_' + str(epoch) + '.npy', ws) np.save(directory + '/b_epoch_' + str(epoch) + '.npy', bs) np.save(directory + '/dy_squared_epoch_' + str(epoch) + '.npy', dy_squared) yield results if epoch > 100 and results[-1, 'train_error'] > 50: return # The Original training loop, just taken out here: ixs = ixs.astype(np.int32) # this is for python version 3.7 x_data_sample, y_data_sample = x_train[ixs], y_train[ixs] training_states, dy_squared = run_eqprop_training_update( x_data=x_data_sample, y_data=y_data_sample, layer_states=training_states, beta=beta, random_flip_beta=random_flip_beta, learning_rate=learning_rate, layer_constructor=layer_constructor, bidirectional=bidirectional, l2_loss=l2_loss, renew_activations=renew_activations, n_negative_steps=n_negative_steps, n_positive_steps=n_positive_steps, prop_direction=prop_direction, splitstream=splitstream, rng=rng, prediction_inp_size=prediction_inp_size, delay=delay, device=device, epoch_check=check_flg, epoch=epoch, pred=pred, batch_size=batch_size, minibatch_size=minibatch_size, dy_squared=dy_squared) check_flg = False this_train_score = ra( percent_argmax_incorrect(output_from_state(training_states), y_train[ixs])) if is_online_checkpoint(): print( f'Epoch {epoch:.3g}: Iter {i}: Score {this_train_score:.3g}%: Mean Rate: {sp(i):.2g}' ) pi.print_update(info=f'Epoch: {epoch}') results[next, :], train_err, test_err, val_err = do_test() yield results
def run_eqprop_training_update(x_data, y_data, layer_states: Sequence[IDynamicLayer], beta: float, random_flip_beta: bool, learning_rate: float, n_negative_steps: int, n_positive_steps: int, layer_constructor: Optional[Callable[ [int, LayerParams], IDynamicLayer]] = None, bidirectional: bool = True, l2_loss: Optional[float] = None, renew_activations: bool = True, prop_direction=PropDirectionOptions.NEUTRAL, splitstream=False, rng=None) -> Sequence[IDynamicLayer]: if isinstance(prop_direction, (list, tuple)): negative_prop_direction, positive_prop_direction = prop_direction else: negative_prop_direction, positive_prop_direction = prop_direction, prop_direction rng = get_rng(rng) this_beta = beta * (torch.randint(2, size=()).float() * 2 - 1) if random_flip_beta else beta negative_states = last( run_negative_phase(x_data=x_data, layer_states=layer_states, n_steps=n_negative_steps, prop_direction=negative_prop_direction)) positive_states = last( run_positive_phase(x_data=x_data, layer_states=negative_states, beta=this_beta, y_data=y_data, n_steps=n_positive_steps, prop_direction=positive_prop_direction)) if splitstream: negative_states = last( run_negative_phase(x_data=x_data, layer_states=negative_states, n_steps=n_positive_steps, prop_direction=positive_prop_direction)) ws, bs = zip(*((s.params.w_aft, s.params.b) for s in layer_states[1:])) neg_acts, pos_acts = [[ls.potential for ls in later_state] for later_state in (negative_states, positive_states) ] new_ws, new_bs = eqprop_update(negative_acts=neg_acts, positive_acts=pos_acts, ws=ws, bs=bs, learning_rate=learning_rate, beta=this_beta, bidirectional=bidirectional, l2_loss=l2_loss) new_params = _params_vals_to_params(new_ws, new_bs) if renew_activations: assert layer_constructor is not None, 'If you choose renew_activations true, you must provide a layer constructor.' new_states = initialize_states(n_samples=x_data.shape[0], params=new_params, layer_constructor=layer_constructor) else: new_states = [ dataclasses.replace(s, params=p) for s, p in izip_equal(positive_states, new_params) ] return new_states
def run_eqprop_training_update(x_data, y_data, layer_states: Sequence[IDynamicLayer], beta: float, random_flip_beta: bool, learning_rate: float, n_negative_steps: int, n_positive_steps: int, layer_constructor: Optional[Callable[ [int, LayerParams], IDynamicLayer]] = None, bidirectional: bool = True, l2_loss: Optional[float] = None, renew_activations: bool = True, prop_direction=PropDirectionOptions.NEUTRAL, splitstream=False, rng=None, prediction_inp_size=None, delay=None, device='cpu', epoch_check=False, epoch=None, pred=False, batch_size=500, minibatch_size=20, dy_squared=None) -> Sequence[IDynamicLayer]: if isinstance(prop_direction, (list, tuple)): negative_prop_direction, positive_prop_direction = prop_direction else: negative_prop_direction, positive_prop_direction = prop_direction, prop_direction rng = get_rng(rng) this_beta = beta * (torch.randint(2, size=()).float() * 2 - 1) if random_flip_beta else beta ## randomly picked up data indices for the prediction and clamped phase data to update the weights update_data_idx = np.random.choice(batch_size, size=minibatch_size, replace=False) ## these indices are for training LS model to predict the activations train_ls_idx = [k for k in range(batch_size) if k not in update_data_idx] if pred: all_negative_states = run_negative_phase( x_data=x_data, layer_states=layer_states, n_steps=n_negative_steps, prop_direction=negative_prop_direction) negative_activations, negative_target_activations, negative_states, all_potential = get_all_potaintial( all_negative_states, prediction_inp_size, n_negative_steps, layer_states) else: negative_states = last( run_negative_phase(x_data=x_data, layer_states=layer_states, n_steps=n_negative_steps, prop_direction=negative_prop_direction)) #positive_states = last(run_positive_phase(x_data=x_data, layer_states=negative_states, beta=this_beta, delay=delay, y_data=y_data, n_steps=n_positive_steps, prop_direction=positive_prop_direction)) positive_states = last( run_positive_phase(x_data=x_data, layer_states=layer_states, beta=this_beta, delay=delay, y_data=y_data, n_steps=n_positive_steps, prop_direction=positive_prop_direction)) if splitstream: negative_states = last( run_negative_phase(x_data=x_data, layer_states=negative_states, n_steps=n_positive_steps, prop_direction=positive_prop_direction)) ws, bs = zip(*((s.params.w_aft, s.params.b) for s in layer_states[1:])) if pred: _, pos_acts = [[ls.potential for ls in later_state] for later_state in (negative_states, positive_states)] pos_acts = [pos_act[update_data_idx, :] for pos_act in pos_acts] neg_act_layer_1 = negative_activations[1] neg_act_layer_2 = negative_activations[2] negative_activations[1] = neg_act_layer_1[:, :, ::2] negative_activations[2] = neg_act_layer_2[:, :, ::2] #print(np.shape(negative_activations[1])) # linear regression prediction neg_acts = predict_dynamics(negative_activations, negative_target_activations, layer_states, device, update_data_idx, train_ls_idx) #print(neg_acts[1]) # if epoch > 1.0: # np.save('C:/Users/yoshi/work/01_python/01_bioplausible/spiking-eqprop/spiking_eqprop/with_delay19_inp18_pred_spike_ada_lr00030002_b500_m10/negative_states_epoch_' + str(epoch) + '.npy', negative_activations) else: neg_acts, pos_acts = [[ls.potential for ls in later_state] for later_state in (negative_states, positive_states)] new_ws, new_bs, dy_squared = eqprop_update(negative_acts=neg_acts, positive_acts=pos_acts, ws=ws, bs=bs, learning_rate=learning_rate, beta=this_beta, bidirectional=bidirectional, l2_loss=l2_loss, dy_squared=dy_squared) new_params = _params_vals_to_params(new_ws, new_bs) if renew_activations: assert layer_constructor is not None, 'If you choose renew_activations true, you must provide a layer constructor.' new_states = initialize_states(n_samples=x_data.shape[0], params=new_params, layer_constructor=layer_constructor) else: new_states = [ dataclasses.replace(s, params=p) for s, p in izip_equal(positive_states, new_params) ] return new_states, dy_squared