def train_regulized_network(mnist, l1_weight=None, l2_weight=None, keep_prob=1.0, file_name=None): if file_name is None: file_name = result_folder + 'original_model.npz' print('=' * 60) print('l1 weight=', l1_weight, 'l2 weight=', l2_weight, 'keep prob.=', keep_prob) initial_weights, initial_biases, layer_types = load_network_parameters( file_name) nn = BasicLenetModel() nn.create_network(initial_weights, initial_biases, layer_types) nn.add_regulizer(l1_weight, l2_weight) nn.create_optimizer(training_algorithm='Adam', learning_rate=0.001, decay_rate=0.98, decay_step=500) nn.create_initializer() nn.initialize() batch_size = 100 for k in range(8001): x, y = mnist.train.next_batch(batch_size) nn.train(x, y, keep_prob) if k % 500 == 0: acc = nn.compute_accuracy(mnist.validation.images, mnist.validation.labels) print('{0:2d}: learning rate={1:5.4f}, accuracy={2:2.3f} '.format( k // 500, nn.learning_rate(), acc)) weights, biases = nn.get_weights() layer_types = nn.get_layer_types() nz = [np.count_nonzero(np.abs(w) > 1e-6) for w in weights] acc = nn.compute_accuracy(mnist.test.images, mnist.test.labels) s = ', '.join('{:.0f}'.format(v) for v in nz) s = ' accuracy={0:4.2f}'.format( acc * 100) + ', number of non-zero elements: ' + s print(s) np.savez_compressed(file_name, w=weights, b=biases, type=layer_types) return weights, biases, layer_types
def parallel_nettrim(mnist, epsilon_gain, original_weights, original_biases, layer_types): nn = BasicLenetModel() nn.create_network(original_weights, original_biases, layer_types) nn.create_initializer() nn.initialize() # use all training samples num_samples = mnist.train.images.shape[0] samples_x, _ = mnist.train.next_batch(num_samples) orig_Weights, orig_Biases = nn.get_weights() layer_types = nn.get_layer_types() signals = nn.get_fw_signals(samples_x) num_layers = len(orig_Weights) # pruning algorithm on all layers unroll_number = 200 num_iterations = 10 nt = nt_tf.NetTrimSolver(unroll_number=unroll_number) pruned_weights = copy.deepcopy(orig_Weights) pruned_biases = copy.deepcopy(orig_Biases) for layer in range(num_layers): print(' Pruning layer ', layer) if layer_types[layer] == 'conv': print('Convolutional layer: skipping.') continue X = np.concatenate( [signals[layer].transpose(), np.ones((1, num_samples))]) Y = signals[layer + 1].transpose() if layer < num_layers - 1: # ReLU layer, use net-trim V = np.zeros(Y.shape) else: # use sparse least-squares (for softmax, ignore the activation function) V = None norm_Y = np.linalg.norm(Y) epsilon = epsilon_gain * norm_Y start = time.time() W_nt = nt.run(X, Y, V, epsilon, rho=100, num_iterations=num_iterations) elapsed = time.time() - start print('Elapsed time: {0:5.3f}'.format(elapsed)) Y_nt = np.matmul(W_nt.transpose(), X) if layer < num_layers - 1: Y_nt = np.maximum(Y_nt, 0) rec_error = np.linalg.norm(Y - Y_nt) nz_count = np.count_nonzero(W_nt > 1e-6) print('non-zeros= {0}, epsilon= {1:.3f}, rec. error= {2:.3f}'.format( nz_count, epsilon, rec_error)) pruned_weights[layer] = W_nt[:-1, :] pruned_biases[layer] = W_nt[-1, :] return pruned_weights, pruned_biases
x, y = mnist.train.next_batch(batch_size) nn.train(x, y, keep_prob) if k % 500 == 0: acc = nn.compute_accuracy(mnist.validation.images, mnist.validation.labels) print('{0:2d}: learning rate={1:5.4f}, accuracy={2:2.3f} '.format(k // 500, nn.learning_rate(), acc)) org_acc = nn.compute_accuracy(mnist.test.images, mnist.test.labels) # # Net-Trim: # change num_samples to a number, say 10000, if you want the Net-Trim retraining with only that many samples num_samples = mnist.train.images.shape[0] samples_x, _ = mnist.train.next_batch(num_samples) orig_Weights, orig_Biases = nn.get_weights() signals = nn.get_fw_signals(samples_x) # num_layers = len(orig_Weights) # # pruning algorithm on all layers nt = nt_tf.NetTrimSolver(unroll_number=unroll_number) layer_types = nn.get_layer_types() pruned_weights = copy.deepcopy(orig_Weights) pruned_biases = copy.deepcopy(orig_Biases)