def __init__(self, input_dim, output_dim, weights=None, bias=None, **kwargs): super(FullyConnectedLayer, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim weight_shape = (output_dim, input_dim) weight_name = None if weights is not None: if not hasattr(weights, 'vals') or not hasattr(weights, 'diffs'): raise ValueError('weights must be a Blob') if weights.shape != weight_shape: raise ValueError('weights do not have the correct shape') self.weights = weights else: weight_name = '%s.weights' % self.name if self.name is not None else None self.weights = Blob(weight_shape, name=weight_name) bias_shape = (output_dim, 1) if bias is not None: if not hasattr(bias, 'vals') or not hasattr(bias, 'diffs'): raise ValueError('bias must be a Blob') if bias.shape != bias_shape: raise ValueError('bias does not have the correct shape') self.bias = bias else: bias_name = '%s.bias' % self.name if self.name is not None else None self.bias = Blob(bias_shape, name=bias_name)
def get_random_layer(self): input_dim = random.randint(2, 10) output_dim = random.randint(2, 10) weights = np.random.randn(output_dim, input_dim) weights_blob = Blob((output_dim, input_dim), vals=weights) bias = np.random.randn(output_dim, 1) bias_blob = Blob((output_dim, 1), vals=bias) return FullyConnectedLayer(input_dim, output_dim, weights=weights_blob, bias=bias_blob)
def forward_test(self): q = Blob((2, 2), vals=np.array([[1.0, 0.0], [0.0, 1.0]])) p = Blob((2, 2), vals=np.array([[0.5, 0.5], [0.5, 0.5]])) loss = Blob(()) layer = CrossEntropyLossLayer(2) layer.forward([q, p], [loss]) diff = loss.vals + 2.0 * np.log(0.5) self.assertTrue(diff < 10e-2)
def forward_test(self): shape = (2, 3) in_data = np.array([[-1, 2, 1], [0, -10, 5]]) out_data = np.array([[0, 2, 1], [0, 0, 5]]) bottom_blob = Blob(shape, vals=in_data) top_blob = Blob(shape) layer = ReLuLayer(shape) layer.forward([bottom_blob], [top_blob]) self.assertTrue(np.all(top_blob.vals == out_data))
def forward_test(self): x1 = np.array([[1, 2, 3], [2, 3, 4]], dtype=np.float32).T x1_blob = Blob(x1.shape, vals=x1) x2 = np.array([[2, 3, 4], [-2, 1, 5]], dtype=np.float32).T x2_blob = Blob(x2.shape, vals=x2) loss = Blob(()) expected_loss = 24 layer = L2LossLayer(3) layer.forward([x1_blob, x2_blob], [loss]) self.assertTrue(np.abs(loss.vals - expected_loss) < 10e-5)
def forward_test(self): shape = (2, 3) x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) y = 1.0 / (1.0 + np.exp(-x)) bottom_blob = Blob(shape, vals=x) top_blob = Blob(shape) layer = SigmoidLayer(shape) layer.forward([bottom_blob], [top_blob]) diff = np.linalg.norm((y - top_blob.vals)[:]) self.assertTrue(diff < 10e-2)
def set_vals_test(self): shape = (2, 3) zero = np.zeros(shape) vals = np.array([[1, 2, 3], [4, 5, 6]]) blob = Blob(shape) self.assertTrue(np.all(blob.vals == zero)) self.assertTrue(np.all(blob.diffs == zero)) blob.vals = vals self.assertTrue(np.all(blob.vals == vals)) self.assertTrue(np.all(blob.diffs == zero))
def gradient_check(layer, param_name=0, batch_size=None, rand_fn=None): """ Compare numeric gradients with layer-computed gradients for either an input blob to the layer or an internal parameter of the layer. Returns the Frobenius norm of the difference between the two. If param_name is None, the input to the layer is checked. Otherwise, param_name is used to fetch a Blob from the layer. """ if len(layer.get_top_shapes()) > 1: raise ValueError( 'Gradient checking is only implemented for layers with ' 'one output') if rand_fn is None: rand_fn = lambda s: 10.0 * np.random.standard_normal(s) if batch_size is None: batch_size = random.randint(2, 10) bottom_shapes = layer.get_bottom_shapes() bottom_shapes = [s + (batch_size, ) for s in bottom_shapes] bottom_blobs = [Blob(s, vals=rand_fn(s)) for s in bottom_shapes] for b in bottom_blobs: print b.vals top_shape = layer.get_top_shapes()[0] if len(top_shape) > 0: top_shape = top_shape + (batch_size, ) top_diffs = rand_fn(top_shape) top_blob = Blob(top_shape, diffs=rand_fn(top_shape)) if type(param_name) == int: blob = bottom_blobs[param_name] else: blob = getattr(layer, param_name) # This should only modify top_blob.vals layer.forward(bottom_blobs, [top_blob]) # This should only modify blob.diffs and bottom_blob.diffs layer.backward(bottom_blobs, [top_blob]) layer_diffs = blob.diffs.copy() numeric_derivative(layer, blob, bottom_blobs, top_blob) numeric_diffs = blob.diffs.copy() diff = layer_diffs - numeric_diffs # diff = np.max(np.abs(diff)) diff = np.linalg.norm(diff[:]) return diff
def init_diffs_test(self): shape = (3, 2) diffs = np.array([[1, 2], [3, 4], [5, 6]]) blob = Blob(shape, diffs=diffs) self.assertTrue(np.all(blob.vals == np.zeros(shape))) self.assertTrue(np.all(blob.diffs == diffs))
def init_vals_test(self): shape = (2, 3) vals = np.array([[1, 2, 3], [4, 5, 6]]) blob = Blob(shape, vals=vals) self.assertTrue(np.all(blob.vals == vals)) self.assertTrue(np.all(blob.diffs == np.zeros(shape)))
def forward_test(self): dim = 3 x = np.array([[1, 4], [2, 5], [3, 6]]) e = np.exp(1) d1 = e + e**2 + e**3 d2 = e**4 + e**5 + e**6 y = np.array([[e / d1, e**4 / d2], [e**2 / d1, e**5 / d2], [e**3 / d1, e**6 / d2]]) bottom_blob = Blob((3, 2), vals=x) top_blob = Blob((3, 2)) layer = SoftmaxLayer(dim) layer.forward([bottom_blob], [top_blob]) diff = top_blob.vals - y self.assertTrue(np.linalg.norm(diff[:]) < 10e-2)
def forward_backward_test(self): """ We test the forward and backward passes using a simple linear regression network. For linear regression we have J(W, b) = \sum_i \|W xi + b - yi\|^2 dJ/dW = 2 \sum_i (W xi xi^T + b xi^T - yi xi^T) dJ/db = 2 \sum_i (W xi + b - y) W = [1 2 3] [4 5 6] b = (1, 2) x1 = (1, 1, 2) x2 = (2, 5, 2) y1 = (1, 0) y2 = (0, 1) Plugging and chugging gives the derivatives: dJ/dW = [[94, 208, 112], [230, 506, 276]] dJ/db = [[56], [138]] """ in_dim = 3 out_dim = 2 w = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) b = np.array([[1, 2]], dtype=np.float32).T weights = Blob((out_dim, in_dim), vals=w) bias = Blob((out_dim, 1), vals=b) affine_layer = FullyConnectedLayer(in_dim, out_dim, name='fc', input_names=['data'], output_names=['predictions'], weights=weights, bias=bias) loss_layer = L2LossLayer(out_dim, name='loss', input_names=['predictions', 'targets'], output_names=['loss']) net = Net([affine_layer, loss_layer], batch_size=2) xs = np.array([[1, 1, 2], [2, 5, 2]], dtype=np.float32).T ys = np.array([[1, 0], [0, 1]], dtype=np.float32).T net.forward(data=xs, targets=ys) net.backward() expected_d_weights = np.array([[94, 208, 112], [230, 506, 276]]) expected_d_bias = np.array([[56, 138]]).T self.assertTrue(np.all(weights.diffs == expected_d_weights)) self.assertTrue(np.all(bias.diffs == expected_d_bias))
def helper(shape, name): if len(shape) > 0: shape = shape + (self.batch_size, ) if name not in self.blobs: self.blobs[name] = Blob(shape, name=name) elif name in self.blobs and shape != self.blobs[name].shape: raise ValueError( 'Blob "%s" referenced with inconsistent shapes %s, %s' % (name, shape, self.blobs[name].shape))
def simple_forward_test(self): in_dim = 2 out_dim = 3 weights = np.array([[1,2],[3,4],[5,6]]) weights_blob = Blob(weights.shape, vals=weights) bias = np.array([[1],[2],[3]]) bias_blob = Blob(bias.shape, vals=bias) input_data = np.array([[10], [20]]) output_data = np.array([[51], [112], [173]]) input_blob = Blob((in_dim, 1), vals=input_data) output_blob = Blob((out_dim, 1)) fcl = FullyConnectedLayer(in_dim, out_dim, weights=weights_blob, bias=bias_blob) fcl.forward([input_blob], [output_blob]) self.assertTrue((output_blob.vals == output_data).all())
def forward_test(self): in_dim = 3 out_dim = 2 w = np.array([[1, -1, 0], [0, 1, -1]], dtype=np.float32) b = np.array([[2], [3]], dtype=np.float32) weights = Blob((out_dim, in_dim), vals=w) bias = Blob((out_dim, 1), vals=b) layer1 = FullyConnectedLayer(in_dim, out_dim, name='fc', input_names=['data'], output_names=['hidden'], weights=weights, bias=bias) layer2 = ReLuLayer((out_dim,), name='relu', input_names=['hidden'], output_names=['output']) net = Net([layer1, layer2], batch_size=2) inputs = np.array([[1, 2, 3], [2, 5, 2]]).T expected_output = np.array([[1, 2], [0, 6]]).T outputs = net.forward(data=inputs) self.assertEqual(1, len(outputs)) self.assertTrue('output' in outputs) self.assertTrue(np.all(outputs['output'] == expected_output))
def init_bad_vals_test(self): shape = (2, 3) vals = np.array([[1, 2], [3, 4]]) with self.assertRaises(ValueError): blob = Blob(shape, vals=vals)
def init_bad_diffs_test(self): shape = (3, 3) diffs = np.array([[1, 2], [3, 4]]) with self.assertRaises(ValueError): blob = Blob(shape, diffs=diffs)
def set_bad_vals_test(self): shape = (5, 5) blob = Blob(shape) with self.assertRaises(ValueError): blob.vals = np.array([[1, 2], [4, 5]])
def init_test(self): shape = (4, 5) blob = Blob(shape) self.assertTrue(np.all(blob.vals == np.zeros(shape))) self.assertTrue(np.all(blob.diffs == np.zeros(shape)))