def test_compute_and_gradients(): """Tests the Network's compute and compute_gradients methods. """ batch = np.random.randint(1, 128) input_dims = np.random.randint(1, 256) output_dims = np.random.randint(1, 512) inputs = np.random.uniform(size=(batch, input_dims)) weights = np.random.uniform(size=(input_dims, output_dims)) biases = np.random.uniform(size=(output_dims)) fullyconnected_layer = FullyConnectedLayer(weights, biases) relu_layer = ReluLayer() fullyconnected_outputs = fullyconnected_layer.compute(inputs) relu_outputs = relu_layer.compute(fullyconnected_outputs) network = Network([fullyconnected_layer, relu_layer]) network_outputs = network.compute(inputs) assert np.allclose(network_outputs, relu_outputs) assert np.allclose(network_outputs, network.compute(list(inputs))) assert np.allclose(network_outputs[0], network.compute(list(inputs)[0])) for label in range(output_dims): gradients = network.compute_gradients(inputs, label) for i in range(batch): if fullyconnected_outputs[i, label] <= 0.0: assert np.allclose(gradients[i], 0.0) else: assert np.allclose(gradients[i], weights[:, label])
def compute(self, inputs, representatives=None): """Computes the output of the Decoupled Network on @inputs. @inputs should be a Numpy array of inputs. """ differ_index = self.differ_index if representatives is not None: differ_index = 0 # Up to differ_index, the values and activation vectors are the same. pre_network = Network(self.activation_layers[:differ_index]) mid_inputs = pre_network.compute(inputs) # Now we have to actually separately handle the masking when # activations != values. activation_vector = mid_inputs if representatives is not None: activation_vector = pre_network.compute(representatives) value_vector = mid_inputs for layer_index in range(differ_index, self.n_layers): activation_layer = self.activation_layers[layer_index] value_layer = self.value_layers[layer_index] if isinstance(activation_layer, LINEAR_LAYERS): if isinstance(activation_layer, ConcatLayer): assert not any( isinstance(input_layer, ConcatLayer) for input_layer in activation_layer.input_layers) assert all( isinstance(input_layer, LINEAR_LAYERS) for input_layer in activation_layer.input_layers) activation_vector = activation_layer.compute(activation_vector) value_vector = value_layer.compute(value_vector) elif isinstance(activation_layer, ReluLayer): mask = np.maximum(np.sign(activation_vector), 0.0) if isinstance(value_vector, np.ndarray): value_vector *= mask else: # NOTE: Originally this was torch.tensor(mask, # dtype=torch.float). I changed to this to silence a # warning from Pytorch. I don't think there will be, but it # might be worth testing for a performance regression. value_vector *= mask.clone().detach().float() activation_vector *= mask elif isinstance(activation_layer, HardTanhLayer): mask = np.ones_like(value_vector) value_vector[activation_vector >= 1.0] = 1.0 value_vector[activation_vector <= -1.0] = -1.0 np.clip(activation_vector, -1.0, 1.0, out=activation_vector) elif isinstance(activation_layer, MaxPoolLayer): activation_vector, indices = activation_layer.compute( activation_vector, return_indices=True) value_vector = value_layer.from_indices(value_vector, indices) else: raise NotImplementedError return value_vector
def compute(self, inputs): """Computes the output of the Masking Network on @inputs. @inputs should be a Numpy array of inputs. """ # Up to differ_index, the values and activation vectors are the same. pre_network = Network(self.activation_layers[:self.differ_index]) mid_inputs = pre_network.compute(inputs) # Now we have to actually separately handle the masking when # activations != values. activation_vector = mid_inputs value_vector = mid_inputs for layer_index in range(self.differ_index, self.n_layers): activation_layer = self.activation_layers[layer_index] value_layer = self.value_layers[layer_index] if isinstance(activation_layer, FullyConnectedLayer): activation_vector = activation_layer.compute(activation_vector) value_vector = value_layer.compute(value_vector) elif isinstance(activation_layer, ReluLayer): mask = np.maximum(np.sign(activation_vector), 0.0) value_vector *= mask activation_vector *= mask else: raise NotImplementedError return value_vector