def test_bad_layer(): """Tests that unspported layers after differ_index fail.""" # It should work if it's before the differ_index. activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), ReluLayer(), FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), ArgMaxLayer(), ] value_layers = activation_layers network = DDNN(activation_layers, value_layers) assert network.differ_index == 4 output = network.compute([[-2.0, 1.0]]) assert np.allclose(output, [[1.0]]) # But not after the differ_index. activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), ReluLayer(), FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), ArgMaxLayer(), ] value_layers = activation_layers[:2] + [ FullyConnectedLayer(3.0 * np.eye(2), np.zeros(shape=(2, ))), ReluLayer(), ] network = DDNN(activation_layers, value_layers) assert network.differ_index == 2 try: output = network.compute([[-2.0, 1.0]]) assert False except NotImplementedError: pass
def patchable_network(cls): """Returns the network used in the patching section. """ A1 = np.array([[-1., 1.], [1., 0.], [0., 1.]]).T b1 = np.array([-0.5, 0., 0.]) A2 = np.array([[1., 1., 1.], [0., -1., -1.]]).T b2 = np.array([0., 1.]) return Network([ FullyConnectedLayer(A1, b1), ReluLayer(), FullyConnectedLayer(A2, b2) ])
def test_nodiffer(): """Tests the it works if activation and value layers are identical.""" activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), ReluLayer(), FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), ReluLayer(), ] value_layers = activation_layers network = DDNN(activation_layers, value_layers) assert network.differ_index == 4 output = network.compute([[-2.0, 1.0]]) assert np.allclose(output, [[0.0, 4.0]])
def test_compute_representatives(): """Tests that the linear-region endpoints work.""" activation_layers = [ FullyConnectedLayer(np.eye(1), np.zeros(shape=(1, ))), ReluLayer(), ] value_layers = [ FullyConnectedLayer(np.eye(1), np.ones(shape=(1, ))), ReluLayer(), ] network = DDNN(activation_layers, value_layers) assert network.differ_index == 0 points = np.array([[0.0], [0.0]]) representatives = np.array([[1.0], [-1.0]]) output = network.compute(points, representatives=representatives) assert np.array_equal(output, [[1.], [0.]])
def run(self): """Fine-tune Squeezenet model and record patched versions.""" network = self.load_network("squeezenet") assert not isinstance(network.layers[-1], ReluLayer) # Add a normalize layer to the start to take the images to the # Squeezenet format. normalize = NormalizeLayer(means=np.array([0.485, 0.456, 0.406]), standard_deviations=np.array( [0.229, 0.224, 0.225])) network = Network([normalize] + network.layers) # Get the trainset and record it. train_inputs, train_labels = self.get_train(n_labels=9) sorted_labels = sorted(set(train_labels)) train_labels = list(map(sorted_labels.index, train_labels)) self.record_artifact(train_inputs, f"train_inputs", "pickle") self.record_artifact(sorted_labels, f"sorted_labels", "pickle") self.record_artifact(train_labels, f"train_labels", "pickle") # Add a final layer which maps it into the subset of classes # considered. final_weights = np.zeros((1000, len(sorted_labels))) final_biases = np.zeros(len(sorted_labels)) for new_label, old_label in enumerate(sorted_labels): final_weights[old_label, new_label] = 1. final_layer = FullyConnectedLayer(final_weights, final_biases) network = Network(network.layers + [final_layer]) # Record the network before patching. self.record_artifact(network, f"pre_patching", "network") which_params = int(input("Which fine-tuning params? (1 or 2): ")) assert which_params in {1, 2} n_rows = int( input("How many rows of Table 1 to generate (1, 2, 3, or 4): ")) for n_points in [100, 200, 400, 800][:n_rows]: print("~~~~", "Points:", n_points, "~~~~") key = f"{n_points}_-1" patcher = FTRepair(network, train_inputs[:n_points], train_labels[:n_points]) patcher.lr = 0.0001 patcher.momentum = 0.0 # This is just a maximum epoch timeout, it will stop once the # constraints are met. patcher.epochs = 500 if which_params == 1: patcher.batch_size = 2 else: patcher.batch_size = 16 patched = patcher.compute() self.record_artifact(patcher.timing, f"{key}/timing", "pickle") self.record_artifact( patched, f"{key}/patched", "network" if patched is not None else "pickle")
def run(self): """Repair Squeezenet model and record patched versions.""" network = self.load_network("squeezenet") assert not isinstance(network.layers[-1], ReluLayer) # Add a normalize layer to the start to take the images to the # Squeezenet format. normalize = NormalizeLayer(means=np.array([0.485, 0.456, 0.406]), standard_deviations=np.array( [0.229, 0.224, 0.225])) network = Network([normalize] + network.layers) # Get the trainset and record it. train_inputs, train_labels = self.get_train(n_labels=9) sorted_labels = sorted(set(train_labels)) train_labels = list(map(sorted_labels.index, train_labels)) self.record_artifact(train_inputs, f"train_inputs", "pickle") self.record_artifact(sorted_labels, f"sorted_labels", "pickle") self.record_artifact(train_labels, f"train_labels", "pickle") # Add a final layer which maps it into the subset of classes # considered. final_weights = np.zeros((1000, len(sorted_labels))) final_biases = np.zeros(len(sorted_labels)) for new_label, old_label in enumerate(sorted_labels): final_weights[old_label, new_label] = 1. final_layer = FullyConnectedLayer(final_weights, final_biases) network = Network(network.layers + [final_layer]) # Record the network before patching. self.record_artifact(network, f"pre_patching", "network") # All the layers we can patch. patchable = [ i for i, layer in enumerate(network.layers) if isinstance(layer, (FullyConnectedLayer, Conv2DLayer)) ] n_rows = int( input("How many rows of Table 1 to generate (1, 2, 3, or 4): ")) for n_points in [100, 200, 400, 800][:n_rows]: print("~~~~", "Points:", n_points, "~~~~") for layer in patchable: print("::::", "Layer:", layer, "::::") key = f"{n_points}_{layer}" patcher = ProvableRepair(network, layer, train_inputs[:n_points], train_labels[:n_points]) patcher.batch_size = 8 patcher.gurobi_timelimit = (n_points // 10) * 60 patcher.gurobi_crossover = 0 patched = patcher.compute() self.record_artifact(patcher.timing, f"{key}/timing", "pickle") self.record_artifact( patched, f"{key}/patched", "ddnn" if patched is not None else "pickle")
def patched_network(cls): """Returns the patched network used in the patching section. """ activation_layers = cls.patchable_network().layers A1 = activation_layers[0].weights.numpy().copy() b1 = activation_layers[0].biases.numpy().copy() A1[0, 0] = 0.0 patched_layer = FullyConnectedLayer(A1, b1) value_layers = [patched_layer] + activation_layers[1:] return MaskingNetwork(activation_layers, value_layers)
def habitability_network(cls, params=False): """Returns the habitability network from the overview. If @params=True, returns a list of the parameters of the network. This option is used to linearize the network around a point in .linearize(), which is in turn used to explicitly state the maps in LaTeX. """ A1 = np.array([[-1.0, 0.25, 1], [+1.0, 0.5, 1], [0, 1, 0], [0.5, 0.5, 2]]).T b1 = np.array([1, -1, -1, -5]) A2 = np.array([[-2, 1.0, 1.0, 1], [1.0, 2.0, -1.0, 2]]).T b2 = np.array([1, 0]) if params: return [A1, b1, A2, b2] return Network([ FullyConnectedLayer(A1, b1), ReluLayer(), FullyConnectedLayer(A2, b2) ])
def test_serialization(): """Tests that it correctly (de)serializes.""" activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), ReluLayer(), FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), ReluLayer(), ] value_layers = activation_layers[:2] + [ FullyConnectedLayer(3.0 * np.eye(2), np.zeros(shape=(2, ))), ReluLayer(), ] network = DDNN(activation_layers, value_layers) serialized = network.serialize() assert all(serialized == layer.serialize() for serialized, layer in zip( serialized.activation_layers, activation_layers)) assert all(serialized == layer.serialize() for serialized, layer in zip( serialized.value_layers, value_layers[2:])) assert serialized.differ_index == 2 assert DDNN.deserialize(serialized).serialize() == serialized
def test_compute(): """Tests that it works for a simple example.""" activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), ReluLayer(), FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), ReluLayer(), ] value_layers = activation_layers[:2] + [ FullyConnectedLayer(3.0 * np.eye(2), np.zeros(shape=(2, ))), ReluLayer(), ] network = DDNN(activation_layers, value_layers) assert network.differ_index == 2 output = network.compute([[-2.0, 1.0]]) assert np.allclose(output, [[0.0, 6.0]]) output = network.compute(torch.tensor([[-2.0, 1.0]])).numpy() assert np.allclose(output, [[0.0, 6.0]]) activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), HardTanhLayer(), ] value_layers = [ FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), HardTanhLayer(), ] network = DDNN(activation_layers, value_layers) output = network.compute([[0.5, -0.9]]) assert np.allclose(output, [[1.0, -1.8]]) # Test HardTanh activation_layers = [ FullyConnectedLayer(np.eye(2), np.ones(shape=(2, ))), HardTanhLayer(), ] value_layers = [ FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2, ))), HardTanhLayer(), ] network = DDNN(activation_layers, value_layers) output = network.compute([[0.5, -0.9]]) assert np.allclose(output, [[1.0, -1.8]]) # Test MaxPool width, height, channels = 2, 2, 2 window_data = StridedWindowData((height, width, channels), (2, 2), (2, 2), (0, 0), channels) maxpool_layer = MaxPoolLayer(window_data) activation_layers = [ FullyConnectedLayer(np.eye(8), np.ones(shape=(8, ))), maxpool_layer, ] value_layers = [ FullyConnectedLayer(-1. * np.eye(8), np.zeros(shape=(8, ))), maxpool_layer, ] network = DDNN(activation_layers, value_layers) output = network.compute([[1.0, 2.0, -1.0, -2.5, 0.0, 0.5, 1.5, -3.5]]) # NHWC, so the two channels are: [1, -1, 0, 1.5] and [2, -2.5, 0.5, -3.5] # So the maxes are 1.5 and 2.0, so the value layer outputs -1.5, -2.0 assert np.allclose(output, [[-1.5, -2.0]])