def do_test(test_subset_generators, f_predict, loss_dict, n_test_iters, collapse_loss = 'mean', in_test_callback = None): if callable(loss_dict): loss_dict = dict(loss=loss_dict) these_test_results = Duck() losses = {} pi = ProgressIndicator(n_test_iters, "Testing") for subset_name, subset_generator in test_subset_generators.items(): start_time = time.time() losses[subset_name] = [] n_tests = 0 for inputs, targets in subset_generator: n_tests+=1 outputs = f_predict(inputs) for loss_name, f_loss in loss_dict.items(): these_test_results[subset_name, Keys.LOSSES, loss_name, next] = f_loss(outputs, targets) pi.print_update() if in_test_callback is not None: in_test_callback(inputs=inputs, targets=targets, outputs=outputs) assert n_tests>0, "It appears that subset '{}' had no tests!".format(subset_name) these_test_results[subset_name, Keys.N_TESTS] = n_tests if collapse_loss is not None: collapse_func = {'mean': np.mean}[collapse_loss] for loss_name, f_loss in loss_dict.items(): these_test_results[subset_name, Keys.LOSSES, loss_name] = collapse_func(these_test_results[subset_name, Keys.LOSSES, loss_name]) these_test_results[subset_name, Keys.TIME] = time.time() - start_time return these_test_results
def demo_kd_too_large( n_steps=20000, kp=.01, kd=1., kp_scan_range = (.001, .1), kd_scan_range = (.1, 10), n_k_points = 32, x_cutoff = 0.01, w_cutoff = 0.002, w_fixed = False, k_spacing = 'log', seed = 1238 ): """ We have time varying signals x, w. See how different choices of kp, kd, and quantization affect our ability to approximate the time-varying quantity x*w. """ rng = np.random.RandomState(seed) x = lowpass_random(n_samples=n_steps, cutoff=x_cutoff, normalize=True, rng=rng) w = lowpass_random(n_samples=n_steps, cutoff=w_cutoff, normalize=True, rng=rng) if not w_fixed else np.ones(n_steps) x_w = x*w distance_mat_nonquantized = np.zeros((n_k_points, n_k_points)) distance_mat_quantized = np.zeros((n_k_points, n_k_points)) distance_mat_recon = np.zeros((n_k_points, n_k_points)) n_spikes = np.zeros((n_k_points, n_k_points)) pi = ProgressIndicator(n_k_points**2) kp_values = point_space(kp_scan_range[0], kp_scan_range[1], n_points=n_k_points, spacing=k_spacing) kd_values = point_space(kd_scan_range[0], kd_scan_range[1], n_points=n_k_points, spacing=k_spacing) for i, kpi in enumerate(kp_values): for j, kdj in enumerate(kd_values): pi.print_update(i*n_k_points+j) x_enc = pid_encode(x, kp=kpi, kd=kdj, quantization=None) x_enc_quantized = pid_encode(x, kp=kpi, kd=kdj, quantization='herd') x_enc_w = pid_decode(x_enc*w, kp=kpi, kd=kdj) x_enc_quantized_w_dec = pid_decode(x_enc_quantized*w, kp=kpi, kd=kdj) x_enc_quantized_dec_w = pid_decode(x_enc_quantized, kp=kpi, kd=kdj)*w distance_mat_nonquantized[i, j] = cosine_distance(x_w, x_enc_w) distance_mat_quantized[i, j] = cosine_distance(x_w, x_enc_quantized_w_dec) distance_mat_recon[i, j] = cosine_distance(x_w, x_enc_quantized_dec_w) n_spikes[i,j] = np.abs(x_enc_quantized).sum() x_enc_quantized = pid_encode(x, kp=kp, kd=kd, quantization='herd') x_enc = pid_encode(x, kp=kp, kd=kd, quantization=None) xwq = pid_decode(x_enc_quantized*w, kp=kp, kd=kd) xwn = pid_decode(x_enc*w, kp=kp, kd=kd) return (x, w, x_w, x_enc_quantized, x_enc, xwq, xwn), (distance_mat_nonquantized, distance_mat_quantized, distance_mat_recon, n_spikes), (kp, kd, kd_values, kp_values)
def test_progress_inidicator(): n_iter = 100 pi = ProgressIndicator(n_iter, update_every='1s') start = time.time() for i in range(n_iter): time.sleep(0.001) if i % 10 == 0: with pi.pause_measurement(): time.sleep(0.02) assert pi.get_elapsed() < (time.time() - start) / 2.
def temporalize(x, smoothing_steps, distance='L1'): """ :param x: An (n_samples, n_dims) dataset :return: A (n_samples, ) array of indexes that can be used to shuffle the input for temporal smoothness. """ x_flat = x.reshape(x.shape[0], -1) index_buffer = np.arange(1, smoothing_steps + 1) next_sample_buffer = x_flat[1:smoothing_steps + 1].copy() # Technically, we could do this without a next_sample_buffer (and only an index_buffer), but it would require # repeatedly accessing a bunch of really scattered memory, so we do it this way. shuffling_indices = np.zeros(len(x), dtype=int) rectifier = np.abs if distance == 'L1' else np.square if distance == 'L2' else bad_value( distance) p = ProgressIndicator(len(x)) current_index = 0 for i in xrange(len(x)): shuffling_indices[i] = current_index closest = np.argmin( rectifier(x_flat[current_index] - next_sample_buffer).sum(axis=1)) current_index = index_buffer[closest] weve_aint_done_yet = i + smoothing_steps + 1 < len(x) next_index = i + smoothing_steps + 1 next_sample_buffer[closest] = x_flat[ next_index] if weve_aint_done_yet else float('inf') index_buffer[closest] = next_index if weve_aint_done_yet else -1 p(i) return shuffling_indices
def do_test(test_subset_generators, f_predict, loss_dict, n_test_iters, collapse_loss='mean', in_test_callback=None): if callable(loss_dict): loss_dict = dict(loss=loss_dict) these_test_results = Duck() losses = {} pi = ProgressIndicator(n_test_iters, "Testing") for subset_name, subset_generator in test_subset_generators.items(): start_time = time.time() losses[subset_name] = [] n_tests = 0 for inputs, targets in subset_generator: n_tests += 1 outputs = f_predict(inputs) for loss_name, f_loss in loss_dict.items(): these_test_results[subset_name, Keys.LOSSES, loss_name, next] = f_loss(outputs, targets) pi.print_update() if in_test_callback is not None: in_test_callback(inputs=inputs, targets=targets, outputs=outputs) assert n_tests > 0, "It appears that subset '{}' had no tests!".format( subset_name) these_test_results[subset_name, Keys.N_TESTS] = n_tests if collapse_loss is not None: collapse_func = {'mean': np.mean}[collapse_loss] for loss_name, f_loss in loss_dict.items(): these_test_results[subset_name, Keys.LOSSES, loss_name] = collapse_func( these_test_results[subset_name, Keys.LOSSES, loss_name]) these_test_results[subset_name, Keys.TIME] = time.time() - start_time return these_test_results
def demo_optimize_conv_scales(n_epochs=5, comp_weight=1e-11, learning_rate=0.1, error_loss='KL', use_softmax=True, optimizer='sgd', shuffle_training=False): """ Run the scale optimization routine on a convnet. :param n_epochs: :param comp_weight: :param learning_rate: :param error_loss: :param use_softmax: :param optimizer: :param shuffle_training: :return: """ if error_loss == 'KL' and not use_softmax: raise Exception( "It's very strange that you want to use a KL divergence on something other than a softmax error. I assume you've made a mistake." ) training_videos, training_vgg_inputs = get_vgg_video_splice( ['ILSVRC2015_train_00033010', 'ILSVRC2015_train_00336001'], shuffle=shuffle_training, shuffling_rng=1234) test_videos, test_vgg_inputs = get_vgg_video_splice( ['ILSVRC2015_train_00033009', 'ILSVRC2015_train_00033007']) set_dbplot_figure_size(12, 6) n_frames_to_show = 10 display_frames = np.arange( len(test_videos) / n_frames_to_show / 2, len(test_videos), len(test_videos) / n_frames_to_show) ax1 = dbplot(np.concatenate(test_videos[display_frames], axis=1), "Test Videos", title='', plot_type='pic') plt.subplots_adjust(wspace=0, hspace=.05) ax1.set_xticks(224 * np.arange(len(display_frames) / 2) * 2 + 224 / 2) ax1.tick_params(labelbottom='on') layers = get_vgg_layer_specifiers( up_to_layer='prob' if use_softmax else 'fc8') # Setup the true VGGnet and get the outputs f_true = ConvNet.from_init(layers, input_shape=(3, 224, 224)).compile() true_test_out = flatten2( np.concatenate([ f_true(frame_positions[None]) for frame_positions in test_vgg_inputs ])) top5_true_guesses = argtopk(true_test_out, 5) true_guesses = np.argmax(true_test_out, axis=1) true_labels = [ get_vgg_label_at(g, short=True) for g in true_guesses[display_frames[::2]] ] full_convnet_cost = np.array([ get_full_convnet_computational_cost(layer_specs=layers, input_shape=(3, 224, 224)) ] * len(test_videos)) # Setup the approximate networks slrc_net = ScaleLearningRoundingConvnet.from_convnet_specs( layers, optimizer=get_named_optimizer(optimizer, learning_rate=learning_rate), corruption_type='rand', rng=1234) f_train_slrc = slrc_net.train_scales.partial( comp_weight=comp_weight, error_loss=error_loss).compile() f_get_scales = slrc_net.get_scales.compile() round_fp = RoundConvNetForwardPass(layers) sigmadelta_fp = SigmaDeltaConvNetForwardPass(layers, input_shape=(3, 224, 224)) p = ProgressIndicator(n_epochs * len(training_videos)) output_dir = make_dir(get_local_path('output/%T-convnet-spikes')) for input_minibatch, minibatch_info in minibatch_iterate_info( training_vgg_inputs, n_epochs=n_epochs, minibatch_size=1, test_epochs=np.arange(0, n_epochs, 0.1)): if minibatch_info.test_now: with EZProfiler('test'): current_scales = f_get_scales() round_cost, round_out = round_fp.get_cost_and_output( test_vgg_inputs, scales=current_scales) sd_cost, sd_out = sigmadelta_fp.get_cost_and_output( test_vgg_inputs, scales=current_scales) round_guesses, round_top1_correct, round_top5_correct = get_and_report_scores( round_cost, round_out, name='Round', true_top_1=true_guesses, true_top_k=top5_true_guesses) sd_guesses, sd_top1_correct, sd_top5_correct = get_and_report_scores( sd_cost, sd_out, name='SigmaDelta', true_top_1=true_guesses, true_top_k=top5_true_guesses) round_labels = [ get_vgg_label_at(g, short=True) for g in round_guesses[display_frames[::2]] ] ax1.set_xticklabels([ '{}\n{}'.format(tg, rg) for tg, rg in izip_equal(true_labels, round_labels) ]) ax = dbplot( np.array([ round_cost / 1e9, sd_cost / 1e9, full_convnet_cost / 1e9 ]).T, 'Computation', plot_type='thick-line', ylabel='GOps', title='', legend=['Round', '$\Sigma\Delta$', 'Original'], ) ax.set_xticklabels([]) plt.grid() dbplot( 100 * np.array( [cummean(sd_top1_correct), cummean(sd_top5_correct)]).T, "Score", plot_type=lambda: LinePlot( y_bounds=(0, 100), plot_kwargs=[ dict(linewidth=3, color='k'), dict(linewidth=3, color='k', linestyle=':') ]), title='', legend=[ 'Round/$\Sigma\Delta$ Top-1', 'Round/$\Sigma\Delta$ Top-5' ], ylabel='Cumulative\nPercent Accuracy', xlabel='Frame #', layout='v', ) plt.grid() plt.savefig( os.path.join(output_dir, 'epoch-%.3g.pdf' % (minibatch_info.epoch, ))) f_train_slrc(input_minibatch) p() print "Epoch {:3.2f}: Scales: {}".format( minibatch_info.epoch, ['%.3g' % float(s) for s in f_get_scales()]) results = dict(current_scales=current_scales, round_cost=round_cost, round_out=round_out, sd_cost=sd_cost, sd_out=sd_out, round_guesses=round_guesses, round_top1_correct=round_top1_correct, round_top5_correct=round_top5_correct, sd_guesses=sd_guesses, sd_top1_correct=sd_top1_correct, sd_top5_correct=sd_top5_correct) dbplot_hang() return results
def train_and_test_predictor( f_train, f_predict, losses, training_data_gen, test_data_gen_constructors, n_training_iters = None, n_test_iters = None, test_checkpoints = ('lin', 1000), collapse_loss = 'mean', progress_update_period = '5s', in_test_callback = None, post_test_callback = None, post_train_callback = None, save_train_return = False, measures = None, iterations_to_end = False ): """ :param f_train: :param f_predict: :param losses: :param training_data_gen: :param test_data_gen_constructors: :param samples_count: :param n_training_iters: :param n_test_iters: :param test_checkpoints: :param collapse_loss: :param progress_update_period: :param in_test_callback: :param post_test_callback: :param post_train_callback: :param save_train_return: :param measures: :return: If yield_array_data is False, an ArrayStruct with fields: 'training' Results recorded during training callbacks training_iter The iteration of the training callback 'testing' Results recorded during tests test_iter The index of the test 'iter' The number of training iterations finished at the time that the test is run 'time' The time at which the test is run 'samples' The number of samples seen so far 'results' A structure containing results subset_name The name of the testing subset e.g. 'train', 'test' (you give subset names in test_data_gen_constructors) 'losses' loss_name The name of the loss function (you provide this in losses) n_tests The numner of tests that were run for this subset 'time' The time, in seconds, that it took to test on this subset. Otherwise, if true, a structure the same object but training_iter and test_iter pushed to the leaf-position """ if measures is None: measures = Duck() if 'training' not in measures: measures[Keys.TRAINING] = Duck() if 'testing' not in measures: measures[Keys.TESTING] = Duck() is_test_time = Checkpoints(test_checkpoints) if not isinstance(test_checkpoints, Checkpoints) else test_checkpoints pi = ProgressIndicator(n_training_iters, "Training", update_every=progress_update_period) for inputs, targets in training_data_gen: if is_test_time(): this_test_measures = measures[Keys.TESTING].open(next) this_test_measures[Keys.ITER] = pi.get_iterations() this_test_measures[Keys.TIME] = pi.get_elapsed() this_test_measures[Keys.RESULTS] = do_test( test_subset_generators={subset_name: constructor() for subset_name, constructor in test_data_gen_constructors.items()}, f_predict=f_predict, loss_dict=losses, n_test_iters=n_test_iters, collapse_loss = collapse_loss, in_test_callback = in_test_callback, ) if post_test_callback is not None: return_val = post_test_callback(this_test_measures) if return_val is not None: this_test_measures[Keys.CALLBACK, ...] = return_val if iterations_to_end: measures_to_yield = measures.arrayify_axis(axis=1, subkeys=Keys.TRAINING) measures_to_yield = measures_to_yield.arrayify_axis(axis=1, subkeys=Keys.TESTING, inplace=True) yield measures_to_yield.to_struct() else: yield measures train_return = f_train(inputs, targets) pi.print_update() if save_train_return: measures[Keys.TRAINING, Keys.RETURNS] = train_return if post_train_callback: return_val = post_train_callback(inputs=inputs, targets=targets, iter=pi.get_iterations()) if return_val is not None: measures[Keys.TRAINING, Keys.CALLBACK, next, ...] = return_val
def demo_kd_too_large(n_steps=20000, kp=.01, kd=1., kp_scan_range=(.001, .1), kd_scan_range=(.1, 10), n_k_points=32, x_cutoff=0.01, w_cutoff=0.002, w_fixed=False, k_spacing='log', seed=1238): """ We have time varying signals x, w. See how different choices of kp, kd, and quantization affect our ability to approximate the time-varying quantity x*w. """ rng = np.random.RandomState(seed) x = lowpass_random(n_samples=n_steps, cutoff=x_cutoff, normalize=True, rng=rng) w = lowpass_random( n_samples=n_steps, cutoff=w_cutoff, normalize=True, rng=rng) if not w_fixed else np.ones(n_steps) x_w = x * w distance_mat_nonquantized = np.zeros((n_k_points, n_k_points)) distance_mat_quantized = np.zeros((n_k_points, n_k_points)) distance_mat_recon = np.zeros((n_k_points, n_k_points)) n_spikes = np.zeros((n_k_points, n_k_points)) pi = ProgressIndicator(n_k_points**2) kp_values = point_space(kp_scan_range[0], kp_scan_range[1], n_points=n_k_points, spacing=k_spacing) kd_values = point_space(kd_scan_range[0], kd_scan_range[1], n_points=n_k_points, spacing=k_spacing) for i, kpi in enumerate(kp_values): for j, kdj in enumerate(kd_values): pi.print_update(i * n_k_points + j) x_enc = pid_encode(x, kp=kpi, kd=kdj, quantization=None) x_enc_quantized = pid_encode(x, kp=kpi, kd=kdj, quantization='herd') x_enc_w = pid_decode(x_enc * w, kp=kpi, kd=kdj) x_enc_quantized_w_dec = pid_decode(x_enc_quantized * w, kp=kpi, kd=kdj) x_enc_quantized_dec_w = pid_decode(x_enc_quantized, kp=kpi, kd=kdj) * w distance_mat_nonquantized[i, j] = cosine_distance(x_w, x_enc_w) distance_mat_quantized[i, j] = cosine_distance( x_w, x_enc_quantized_w_dec) distance_mat_recon[i, j] = cosine_distance(x_w, x_enc_quantized_dec_w) n_spikes[i, j] = np.abs(x_enc_quantized).sum() x_enc_quantized = pid_encode(x, kp=kp, kd=kd, quantization='herd') x_enc = pid_encode(x, kp=kp, kd=kd, quantization=None) xwq = pid_decode(x_enc_quantized * w, kp=kp, kd=kd) xwn = pid_decode(x_enc * w, kp=kp, kd=kd) return (x, w, x_w, x_enc_quantized, x_enc, xwq, xwn), (distance_mat_nonquantized, distance_mat_quantized, distance_mat_recon, n_spikes), (kp, kd, kd_values, kp_values)
def get_mnist_results_with_parameters(weights, biases, scales=None, hidden_activations='relu', output_activation='softmax', n_samples=None, smoothing_steps=1000): """ Return a data structure showing the error and computation for required by the orignal, rounding, and sigma-delta implementation of a network with the given parameters. :param weights: :param biases: :param scales: :param hidden_activations: :param output_activation: :param n_samples: :param smoothing_steps: :return: results: An OrderedDict Where the key is a 3-tuple are: (dataset_name, subset, net_version), Where: dataset_name: is 'mnist' or 'temp_mnist' subset: is 'train' or 'test' net_version: is 'td' or 'round' or 'truth' And values are another OrderedDict, with keys: 'MFlops', 'l1_errorm', 'class_error' ... for discrete nets and 'Dense MFlops', 'Sparse MFlops', 'class_error' for "true" nets. """ mnist = get_mnist_dataset(flat=True, n_training_samples=n_samples, n_test_samples=n_samples) temp_mnist = get_temporal_mnist_dataset(flat=True, smoothing_steps=smoothing_steps, n_training_samples=n_samples, n_test_samples=n_samples) results = OrderedDict() p = ProgressIndicator(2 * 3 * 2) for dataset_name, (tr_x, tr_y, ts_x, ts_y) in [('mnist', mnist.xyxy), ('temp_mnist', temp_mnist.xyxy)]: for subset, x, y in [('train', tr_x, tr_y), ('test', ts_x, ts_y)]: traditional_net_output, dense_flops, sparse_flops = forward_pass_and_cost( input_data=x, weights=weights, biases=biases, hidden_activations=hidden_activations, output_activations=output_activation) assert round(dense_flops) == dense_flops and round( sparse_flops) == sparse_flops, 'Flop counts must be int!' class_error = percent_argmax_incorrect(traditional_net_output, y) results[dataset_name, subset, 'truth'] = OrderedDict([ ('Dense MFlops', dense_flops / (1e6 * len(x))), ('Sparse MFlops', sparse_flops / (1e6 * len(x))), ('class_error', class_error) ]) for net_version in 'td', 'round': (comp_cost_adds, comp_cost_multiplyadds ), output = tdnet_forward_pass_cost_and_output( inputs=x, weights=weights, biases=biases, scales=scales, version=net_version, hidden_activations=hidden_activations, output_activations=output_activation, quantization_method='herd', computation_calc=('adds', 'multiplyadds')) l1_error = np.abs(output - traditional_net_output).sum( axis=1).mean(axis=0) class_error = percent_argmax_incorrect(output, y) results[dataset_name, subset, net_version] = OrderedDict([ ('MFlops', comp_cost_adds / (1e6 * len(x))), ('MFlops-multadd', comp_cost_multiplyadds / (1e6 * len(x))), ('l1_error', l1_error), ('class_error', class_error) ]) p.print_update() return results
def train_online_network_checkpoints(model, dataset, checkpoint_generator = None, \ test_online=True, return_output = True, n_tests=0, offline_test_mode=None, is_cuda=False, \ online_test_reporter ='recent', error_func ='mse', batchify = False, print_every = 5): """ :param model: A TrainableStatefulModule :param dataset: A 4-tuple of (x_train, y_train, x_test, y_test) where the first axis of each is the sample # :param n_tests: Number of "splits", in the training set... (where we run a full test) :return: train_test_errors: A tuple of (time_step, train_error, test_error) """ data = numpy_struct_to_torch_struct(dataset, cast_floats='float32') if batchify: data = [x[:, None] for x in data] if len(data) == 4: x_train, y_train, x_test, y_test = data elif len(data) == 2: x_train, y_train = data x_test, y_test = [], [] else: raise Exception( 'Expected data to be (x_train, y_train, x_test, y_test) or (x_test, y_test)' ) assert len(y_train) == len(x_train) assert len(x_test) == len(y_test) if is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_test = x_test.cuda() y_test = y_test.cuda() if isinstance(checkpoint_generator, tuple): distribution = checkpoint_generator[0] if distribution == 'even': interval, = checkpoint_generator[1:] checkpoint_generator = (interval * i for i in itertools.count(1)) elif distribution == 'exp': first, growth = checkpoint_generator[1:] checkpoint_generator = (first * i * (1 + growth)**(i - 1) for i in itertools.count(1)) else: raise Exception("Can't make a checkpoint generator {}".format( checkpoint_generator)) if isinstance(error_func, str): error_func = create_loss_function(error_func) n_training_samples = len(x_train) test_iterations = [ int(n_training_samples * i / float(n_tests - 1)) for i in range(0, n_tests) ] initial_state = model.get_state() # results = SequentialStructBuilder() results = Duck() if test_online: loss_accumulator = RunningAverage( ) if online_test_reporter == 'cum' else RecentRunningAverage( ) if online_test_reporter == 'recent' else lambda x: x if online_test_reporter is None else bad_value( online_test_reporter) t_start = time.time() next_checkpoint = float('inf') if checkpoint_generator is None else next( checkpoint_generator) err = np.nan pi = ProgressIndicator( n_training_samples + 1, update_every=(print_every, 'seconds'), show_total=True, post_info_callback=lambda: 'Iteration {} of {}. Online {} Error: {}'. format(t, len(x_train), online_test_reporter, err)) for t in range(n_training_samples + 1): if offline_test_mode is not None and t in test_iterations: training_state = model.get_state() model.set_state(initial_state) if offline_test_mode == 'full_pass': y_train_guess = torch_loop(model, is_cuda, x_train[:t]) if t > 0 else None if t < len(x_train) - 1: y_middle_guess = torch_loop(model, is_cuda, x_train[t:None]) y_test_guess = torch_loop(model, is_cuda, x_test) if is_cuda: y_train_guess = y_test_guess.cuda() y_test_guess = y_test_guess.cuda() if is_cuda: train_err = error_func( _flatten_first_2(y_train_guess), _flatten_first_2(y_train[:t])).data.cpu().numpy( ) if y_train_guess is not None else np.nan test_err = error_func( _flatten_first_2(y_test_guess), _flatten_first_2(y_test)).data.cpu().numpy() else: train_err = error_func( _flatten_first_2(y_train_guess), _flatten_first_2(y_train[:t])).data.numpy( ) if y_train_guess is not None else np.nan test_err = error_func( _flatten_first_2(y_test_guess), _flatten_first_2(y_test)).data.numpy() # train_err, test_err = tuple((y_guess - y_truth).abs().sum().data.numpy() for y_guess, y_truth in [(y_train_guess, y_train[:t] if t>0 else torch.zeros(2, 2, 2)/0), (y_test_guess, y_test)]) print('Iteration {} of {}: Training: {:.3g}, Test: {:.3g}'. format(t, len(x_train), train_err, test_err)) results['offline_errors', next, :] = dict(t=t, train=train_err, test=test_err) # results['offline_errors']['t'].next = t # results['offline_errors']['train'].next = train_err # results['offline_errors']['test'].next = test_err elif offline_test_mode == 'cold_test': y_test_guess = torch_loop(model, is_cuda, x_test) y_test_guess = _flatten_first_2(y_test_guess) if is_cuda: y_test_guess = y_test_guess.cuda() test_err = error_func( y_test_guess, _flatten_first_2(y_test)).data.cpu().numpy() else: test_err = error_func( y_test_guess, _flatten_first_2(y_test)).data.numpy() print('Iteration {} of {}: Test: {:.3g}'.format( t, len(x_train), test_err)) results['offline_errors', next, :] = dict(t=t, test=test_err) # results['offline_errors']['t'].next = t # results['offline_errors']['test'].next = test_err else: raise Exception('No test_mode: {}'.format(offline_test_mode)) model.set_state(training_state) if t < n_training_samples: #model.set_state(initial_state) #training_state = model.get_state() #model.set_state(training_state) out = model.train_it(x_train[t], y_train[t], is_cuda) if return_output: if is_cuda: results['output', next] = out.data.cpu().numpy()[0] else: results['output', next] = out.data.numpy()[0] if test_online: # print 'Out: {}, Correct: {}'.format(np.argmax(out.data.numpy(), axis=1), torch_str(y_train[t])) this_loss = error_func(out, y_train[t]).item() err = loss_accumulator(this_loss) results['online_errors', next] = this_loss if online_test_reporter is not None: results['smooth_online_errors', online_test_reporter, next] = err # if t in test_iterations: # print('Iteration {} of {} Online {} Error: {}'.format(t, len(x_train), online_test_reporter, err)) pi() if t >= next_checkpoint or t == n_training_samples: results['checkpoints', next, :] = { 'iter': t, 'runtime': time.time() - t_start } yield results next_checkpoint = next(checkpoint_generator) # yield nested_map(lambda x: np.array(x), results, is_container_func=lambda x: isinstance(x, dict)) import pdb pdb.set_trace()
def train_and_test_predictor(f_train, f_predict, losses, training_data_gen, test_data_gen_constructors, n_training_iters=None, n_test_iters=None, test_checkpoints=('lin', 1000), collapse_loss='mean', progress_update_period='5s', in_test_callback=None, post_test_callback=None, post_train_callback=None, save_train_return=False, measures=None, iterations_to_end=False): """ :param f_train: :param f_predict: :param losses: :param training_data_gen: :param test_data_gen_constructors: :param samples_count: :param n_training_iters: :param n_test_iters: :param test_checkpoints: :param collapse_loss: :param progress_update_period: :param in_test_callback: :param post_test_callback: :param post_train_callback: :param save_train_return: :param measures: :return: If yield_array_data is False, an ArrayStruct with fields: 'training' Results recorded during training callbacks training_iter The iteration of the training callback 'testing' Results recorded during tests test_iter The index of the test 'iter' The number of training iterations finished at the time that the test is run 'time' The time at which the test is run 'samples' The number of samples seen so far 'results' A structure containing results subset_name The name of the testing subset e.g. 'train', 'test' (you give subset names in test_data_gen_constructors) 'losses' loss_name The name of the loss function (you provide this in losses) n_tests The numner of tests that were run for this subset 'time' The time, in seconds, that it took to test on this subset. Otherwise, if true, a structure the same object but training_iter and test_iter pushed to the leaf-position """ if measures is None: measures = Duck() if 'training' not in measures: measures['training'] = Duck() if 'testing' not in measures: measures['testing'] = Duck() is_test_time = Checkpoints(test_checkpoints) if not isinstance( test_checkpoints, Checkpoints) else test_checkpoints pi = ProgressIndicator(n_training_iters, "Training", update_every=progress_update_period) for inputs, targets in training_data_gen: if is_test_time(): this_test_measures = measures['testing'].open(next) this_test_measures['iter'] = pi.get_iterations() this_test_measures['time'] = pi.get_elapsed() this_test_measures['results'] = do_test( test_subset_generators={ subset_name: constructor() for subset_name, constructor in test_data_gen_constructors.items() }, f_predict=f_predict, loss_dict=losses, n_test_iters=n_test_iters, collapse_loss=collapse_loss, in_test_callback=in_test_callback, ) if post_test_callback is not None: post_test_callback(this_test_measures) if iterations_to_end: measures_to_yield = measures.arrayify_axis(axis=1, subkeys='training') measures_to_yield = measures_to_yield.arrayify_axis( axis=1, subkeys='testing', inplace=True) yield measures_to_yield.to_struct() else: yield measures train_return = f_train(inputs, targets) pi.print_update() if save_train_return: measures['training', 'returns'] = train_return if post_train_callback: return_val = post_train_callback(inputs=inputs, targets=targets, iter=pi.get_iterations()) if return_val is not None: measures['training', 'callback', next, ...] = return_val
def experiment_mnist_eqprop_torch( layer_constructor: Callable[[int, LayerParams], IDynamicLayer], n_epochs=10, hidden_sizes=(500, ), minibatch_size=10, # update mini-batch size batch_size=500, # total batch size beta=.5, random_flip_beta=True, learning_rate=.05, n_negative_steps=120, n_positive_steps=80, initial_weight_scale=1., online_checkpoints_period=None, epoch_checkpoint_period=1.0, #'100s', #{0: .25, 1: .5, 5: 1, 10: 2, 50: 4}, skip_zero_epoch_test=False, n_test_samples=10000, prop_direction: Union[str, Tuple] = 'neutral', bidirectional=True, renew_activations=True, do_fast_forward_pass=False, rebuild_coders=True, l2_loss=None, splitstream=False, seed=1234, prediction_inp_size=17, ## prediction input size delay=18, ## delay size for the clamped phase pred=True, ## if you want to use the prediction check_flg=False, ): """ Replicate the results of Scellier & Bengio: Equilibrium Propagation: Bridging the Gap between Energy-Based Models and Backpropagation https://www.frontiersin.org/articles/10.3389/fncom.2017.00024/full Specifically, the train_model demo here: https://github.com/bscellier/Towards-a-Biologically-Plausible-Backprop Differences between our code and theirs: - We do not keep persistent layer activations tied to data points over epochs. So our results should only really match for the first epoch. - We evaluate training score periodically, rather than online average (however you can see online score by setting online_checkpoints_period to something that is not None) """ torch.manual_seed(seed) device = 'cuda' if torch.cuda.is_available( ) and USE_CUDA_WHEN_AVAILABLE else 'cpu' if device == 'cuda': torch.set_default_tensor_type(torch.cuda.FloatTensor) print(f'Using Device: {device}') print('Params:\n' + '\n'.join(list(f' {k} = {v}' for k, v in locals().items()))) rng = get_rng(seed) n_in = 784 n_out = 10 dataset = input_data.read_data_sets('MNIST_data', one_hot=True) x_train, y_train = torch.tensor( dataset.train.images, dtype=torch.float32 ).to(device), torch.tensor(dataset.train.labels, dtype=torch.float32).to( device ) #(torch.tensor(a.astype(np.float32)).to(device) for a in dataset.mnist.train.images.xy) x_test, y_test = torch.tensor( dataset.test.images, dtype=torch.float32).to(device), torch.tensor( dataset.test.labels, dtype=torch.float32).to( device) # Their 'validation set' is our 'test set' x_val, y_val = torch.tensor( dataset.validation.images, dtype=torch.float32).to(device), torch.tensor( dataset.validation.labels, dtype=torch.float32).to( device) # Their 'validation set' is our 'test set' if is_test_mode(): x_train, y_train, x_test, y_test, x_val, y_val = x_train[: 100], y_train[: 100], x_test[: 100], y_test[: 100], x_val[: 100], y_val[: 100] n_epochs = 1 n_negative_steps = 3 n_positive_steps = 3 layer_sizes = [n_in] + list(hidden_sizes) + [n_out] ra = RunningAverage() sp = Speedometer(mode='last') is_online_checkpoint = Checkpoints( online_checkpoints_period, skip_first=skip_zero_epoch_test ) if online_checkpoints_period is not None else lambda: False is_epoch_checkpoint = Checkpoints(epoch_checkpoint_period, skip_first=skip_zero_epoch_test) training_states = initialize_states( layer_constructor=layer_constructor, #n_samples=minibatch_size, n_samples=batch_size, params=initialize_params(layer_sizes=layer_sizes, initial_weight_scale=initial_weight_scale, rng=rng)) # dbplot(training_states[0].params.w_fore[:10, :10], str(rng.randint(265))) if isinstance(prop_direction, str): fwd_prop_direction, backward_prop_direction = prop_direction, prop_direction else: fwd_prop_direction, backward_prop_direction = prop_direction def do_test(): # n_samples = n_test_samples if n_test_samples is not None else len(x_test) test_error, train_error, val_error = [ percent_argmax_incorrect( run_inference( x_data=x[:n_test_samples], states=initialize_states( layer_constructor=layer_constructor, params=[s.params for s in training_states], n_samples=n_samples), n_steps=n_negative_steps, prop_direction=fwd_prop_direction, ), y[:n_samples]).item() for x, y in [(x_test, y_test), (x_train, y_train), (x_val, y_val)] for n_samples in [ min(len(x), n_test_samples ) if n_test_samples is not None else len(x) ] ] # Not an actal loop... just hack for assignment in comprehensions print( f'Epoch: {epoch:.3g}, Iter: {i}, Test Error: {test_error:.3g}%, Train Error: {train_error:.3g}, Validation Error: {val_error:.3g}, Mean Rate: {sp(i):.3g}iter/s' ) return dict(iter=i, epoch=epoch, train_error=train_error, test_error=test_error, val_error=val_error), train_error, test_error, val_error results = Duck() pi = ProgressIndicator(expected_iterations=n_epochs * dataset.train.num_examples / minibatch_size, update_every='10s') dy_squared = [] dy_squared.append(None) dy_squared.append(None) for i, (ixs, info) in enumerate( minibatch_index_info_generator(n_samples=x_train.size()[0], minibatch_size=batch_size, n_epochs=n_epochs)): epoch = i * batch_size / x_train.shape[0] if is_epoch_checkpoint(epoch): check_flg = False x_train, y_train = shuffle_data(x_train, y_train) with pi.pause_measurement(): results[next, :], train_err, test_err, val_err = do_test() ## prepare for saving the parameters ws, bs = zip(*((s.params.w_aft, s.params.b) for s in training_states[1:])) f = None if os.path.isfile(directory + '/log.txt'): f = open(directory + '/log.txt', 'a') else: os.mkdir(directory) f = open(directory + '/log.txt', 'w') f.write("Epoch: " + str(epoch) + '\n') f.write("accuracy for training: " + str(train_err) + '\n') f.write("accuracy for testing: " + str(test_err) + '\n') f.write("accuracy for validation: " + str(val_err) + '\n') f.close() np.save(directory + '/w_epoch_' + str(epoch) + '.npy', ws) np.save(directory + '/b_epoch_' + str(epoch) + '.npy', bs) np.save(directory + '/dy_squared_epoch_' + str(epoch) + '.npy', dy_squared) yield results if epoch > 100 and results[-1, 'train_error'] > 50: return # The Original training loop, just taken out here: ixs = ixs.astype(np.int32) # this is for python version 3.7 x_data_sample, y_data_sample = x_train[ixs], y_train[ixs] training_states, dy_squared = run_eqprop_training_update( x_data=x_data_sample, y_data=y_data_sample, layer_states=training_states, beta=beta, random_flip_beta=random_flip_beta, learning_rate=learning_rate, layer_constructor=layer_constructor, bidirectional=bidirectional, l2_loss=l2_loss, renew_activations=renew_activations, n_negative_steps=n_negative_steps, n_positive_steps=n_positive_steps, prop_direction=prop_direction, splitstream=splitstream, rng=rng, prediction_inp_size=prediction_inp_size, delay=delay, device=device, epoch_check=check_flg, epoch=epoch, pred=pred, batch_size=batch_size, minibatch_size=minibatch_size, dy_squared=dy_squared) check_flg = False this_train_score = ra( percent_argmax_incorrect(output_from_state(training_states), y_train[ixs])) if is_online_checkpoint(): print( f'Epoch {epoch:.3g}: Iter {i}: Score {this_train_score:.3g}%: Mean Rate: {sp(i):.2g}' ) pi.print_update(info=f'Epoch: {epoch}') results[next, :], train_err, test_err, val_err = do_test() yield results
def demo_quantized_convergence( quantized_layer_constructor, smooth_epsilon=0.5, layer_sizes=(500, 500, 10), initialize_acts_randomly=False, minibatch_size=1, # n_steps = 100, n_steps=10000, initial_weight_scale=1., prop_direction='neutral', data_seed=1241, param_seed=1237, hang=True, plot=False): """ """ smooth_layer_constructor = SimpleLayerController.get_partial_constructor( epsilon=smooth_epsilon) print('Params:\n' + '\n'.join(list(f' {k} = {v}' for k, v in locals().items()))) data_rng = get_rng(data_seed) param_rng = get_rng(param_seed) HISTORY_LEN = n_steps N_NEURONS_TO_PLOT = 10 if is_test_mode(): n_steps = 10 pi = ProgressIndicator(update_every='2s', expected_iterations=2 * n_steps) n_in, n_out = layer_sizes[0], layer_sizes[-1] x_data = data_rng.rand(minibatch_size, n_in) params = initialize_params(layer_sizes=layer_sizes, initial_weight_scale=initial_weight_scale, rng=param_rng) def run_update(layer_constructor, mode): plt.gca().set_prop_cycle(None) states = initialize_states(layer_constructor=layer_constructor, n_samples=minibatch_size, params=params) for t in range(n_steps): states = eqprop_step(layer_states=states, x_data=x_data, beta=0, y_data=None, direction=prop_direction) acts = [s.potential for s in states] yield acts if plot: dbplot_collection( [a[0, :N_NEURONS_TO_PLOT] for a in acts], f'{mode} acts', axis='acts', draw_every='5s', cornertext=f'Negative Phase: {t}', plot_type=lambda: MovingPointPlot( buffer_len=HISTORY_LEN, plot_kwargs=dict(linestyle='-.' if mode == 'Smooth' else '-'), reset_color_cycle=True)) # dbplot_collection([a[0, :N_NEURONS_TO_PLOT] for a in acts], f'{mode} acts', axis='acts', draw_every=1, cornertext=f'Negative Phase: {t}', plot_type = lambda: MovingPointPlot(buffer_len=HISTORY_LEN, plot_kwargs=dict(linestyle = '-.' if mode=='Smooth' else '-'), reset_color_cycle=True)) pi() smooth_record = list( run_update(layer_constructor=smooth_layer_constructor, mode='Smooth')) smooth_acts = smooth_record[-1] rough_record = list( run_update(layer_constructor=quantized_layer_constructor, mode='Rough')) rough_acts = rough_record[-1] rs_online_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_rough, hs_smooth in zip(rough_record, smooth_record)]) rs_end_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_smooth in [smooth_record[-1]] for hs_rough in rough_record]) rr_end_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_smooth in [rough_record[-1]] for hs_rough in rough_record]) ss_end_errors = np.array( [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)] for hs_smooth in [smooth_record[-1]] for hs_rough in smooth_record]) mean_abs_error = np.mean(rs_online_errors, axis=0) final_abs_error = rs_online_errors[-1] print( f'Mean Abs Layerwise Errors: {np.array_str(mean_abs_error, precision=5)}\t Final Layerwise Errors: {np.array_str(final_abs_error, precision=5)}' ) return rs_online_errors, rs_end_errors, rr_end_errors, ss_end_errors
def demo_quantized_convergence_perturbed( quantized_layer_constructor, smooth_epsilon=0.5, layer_sizes=(500, 500, 10), initialize_acts_randomly=False, minibatch_size=20, # n_steps = 100, smooth_longer_factor=10, n_steps=500, change_frac=0.5, beta=.5, # TODO: revert initial_weight_scale=1., prop_direction='neutral', data_seed=None, param_seed=None, hang=True, ): """ """ perturbation_step = int(n_steps * change_frac) smooth_layer_constructor = SimpleLayerController.get_partial_constructor( epsilon=smooth_epsilon) print('Params:\n' + '\n'.join(list(f' {k} = {v}' for k, v in locals().items()))) data_rng = get_rng(data_seed) param_rng = get_rng(param_seed) HISTORY_LEN = n_steps N_NEURONS_TO_PLOT = 10 if is_test_mode(): n_steps = 10 perturbation_step = 5 pi = ProgressIndicator(update_every='2s', expected_iterations=2 * n_steps) n_in, n_out = layer_sizes[0], layer_sizes[-1] x_data = to_default_tensor(data_rng.rand(minibatch_size, n_in)) y_data = torch.zeros((minibatch_size, n_out)) y_data[np.arange(len(y_data)), np.random.choice(n_out, size=minibatch_size)] = 1 params = initialize_params(layer_sizes=layer_sizes, initial_weight_scale=initial_weight_scale, rng=param_rng) def run_update(layer_constructor, mode): if PLOT: plt.gca().set_color_cycle(None) states = initialize_states(layer_constructor=layer_constructor, n_samples=minibatch_size, params=params) for t in range(n_steps): for _ in range( smooth_longer_factor) if mode == 'Smooth' else range(1): if t < perturbation_step: states = eqprop_step(layer_states=states, x_data=x_data, beta=0, y_data=None, direction=prop_direction) else: states = eqprop_step(layer_states=states, x_data=x_data, beta=beta, y_data=y_data, direction=prop_direction) acts = [s.potential for s in states] yield acts[1:] if PLOT: if do_every('2s'): dbplot([put_vector_in_grid(a[0]) for a in acts], f'acts-{mode}', title=f'{mode} Iter-{t}') if mode == 'Rough': dbplot([ states[1].stepper.step_size.mean(), states[2].stepper.step_size.mean() ], 'step size', draw_every='2s') pi() if PLOT: dbplot_redraw_all() rough_record = list( run_update(layer_constructor=quantized_layer_constructor, mode='Rough')) smooth_record = list( run_update(layer_constructor=smooth_layer_constructor, mode='Smooth')) smooth_neg_endpoint = smooth_record[perturbation_step - 1] smooth_pos_endpoint = smooth_record[-1] smooth_endpoint_delta = np.concatenate([ sp - sn for sp, sn in izip_equal(smooth_pos_endpoint, smooth_neg_endpoint) ], axis=1) rough_endpoint_delta = np.concatenate([ sp - sn for sp, sn in izip_equal(rough_record[-1], rough_record[ perturbation_step - 1]) ], axis=1) distance_to_converged = np.array( [[ torch.mean(abs(hr - hs)).item() for hr, hs in zip(hs_rough, smooth_neg_endpoint) ] for hs_rough in rough_record[:perturbation_step]] + [[ torch.mean(abs(hr - hs)).item() for hr, hs in zip(hs_rough, smooth_pos_endpoint) ] for hs_rough in rough_record[perturbation_step:]] ) # (n_steps, n_layers) array indicating convergence to the fixed point for each non-input layer. # plt.semilogy(distance_to_converged) # plt.show() if PLOT: dbplot( distance_to_converged, 'errors', plot_type=lambda: LinePlot(x_axis_type='log', y_axis_type='log'), legend=[f'Layer {i+1}' for i in range(len(layer_sizes))]) mean_abs_error = np.mean(distance_to_converged, axis=0) final_abs_error = distance_to_converged[-1] print( f'Mean Abs Layerwise Errors: {np.array_str(mean_abs_error, precision=5)}\t Final Layerwise Errors: {np.array_str(final_abs_error, precision=5)}' ) return distance_to_converged, rough_endpoint_delta, smooth_endpoint_delta