def TranslateConvWithGroups(layer, pretrained_blobs): print("Legacy warning: convolution with groups seem to be less and less " + "popular, so we no longer have it as a first-class citizen op. " + "Instead, we will simulate it with depth split followed by conv " + "followed by depth concat.") caffe_ops = [] caffe_params = [] param = layer.convolution_param weight, bias = pretrained_blobs bias = bias.flatten() n, c, h, w = weight.shape g = param.group # group od = int(n / g) # output dimension if (od * g != n): # This should not happen: n should always be divisible by g. raise ValueError("This should not happen.") output = layer.top[0] # first, depth_split depth_split_op = core.CreateOperator("DepthSplit")( layer.bottom[0], ['_' + output + '_gconv_split_' + str(i) for i in range(g)], dimensions=[c for i in range(g)], order="NCHW") caffe_ops.append(depth_split_op) # second, convolutions for i in range(g): # convolution layer i this_weight = utils.NumpyArrayToCaffe2Tensor( weight[i * od:(i + 1) * od], output + '_gconv_' + str(i) + '_w') this_bias = utils.NumpyArrayToCaffe2Tensor( bias[i * od:(i + 1) * od], output + '_gconv_' + str(i) + '_b') conv_op = core.CreateOperator("Conv")( [depth_split_op.output[i], this_weight.name, this_bias.name], ['_' + output + '_gconv_conv_' + str(i)], stride=param.stride, kernel=param.kernel_size, pad=param.pad, order="NCHW") caffe_ops.append(conv_op) caffe_params.extend([this_weight, this_bias]) # third, depth concat depth_concat_op = core.CreateOperator("DepthConcat")( ['_' + output + '_gconv_conv_' + str(i) for i in range(g)], [output, '_' + output + '_gconv_concat_dims'], order="NCHW") caffe_ops.append(depth_concat_op) return caffe_ops, caffe_params
def testSigmoid(self): for input_size in self.test_configs: op = core.CreateOperator("Sigmoid")(["X"], ["Y"]) X = np.random.rand(*input_size).astype(np.float32) - 0.5 res = device_checker.CheckSimple(op, [X], [0]) self.assertTrue(res) for checker in gradient_checkers: res, grad, grad_estimated = checker.CheckSimple( op, [X], 0, [0]) self.assertTrue(res)
def testRelu(self): for input_size in self.test_configs: op = core.CreateOperator("Relu")(["X"], ["Y"]) X = np.random.rand(*input_size).astype(np.float32) # go away from the origin point to avoid kink problems X += 0.01 * np.sign(X) X[X == 0] = 0.01 res = device_checker.CheckSimple(op, [X], [0]) self.assertTrue(res) for checker in gradient_checkers: res, grad, grad_estimated = checker.CheckSimple( op, [X], 0, [0]) self.assertTrue(res)
def testLRN(self): for input_size, depth in self.test_configs: op = core.CreateOperator("LRN")(["X"], ["Y", "Y_scale"], size=11, alpha=0.001, beta=0.5, bias=2.0, order="NHWC") X = np.random.rand(2, input_size, input_size, depth).astype(np.float32) res = device_checker.CheckSimple(op, [X], [0]) self.assertTrue(res) for checker in gradient_checkers: res, grad, grad_estimated = checker.CheckSimple( op, [X], 0, [0]) self.assertTrue(res)
def testAveragePoolingLegacyPadding(self): for stride, kernel, legacy_pad, size, order in self.test_configs: print 'AveragePool', stride, kernel, legacy_pad, size, order op = core.CreateOperator("AveragePool")(["X"], ["Y"], stride=stride, kernel=kernel, legacy_pad=legacy_pad, order=order) if order == "NHWC": X = np.random.rand(2, size, size, 3).astype(np.float32) else: X = np.random.rand(2, 3, size, size).astype(np.float32) res = device_checker.CheckSimple(op, [X], [0]) self.assertTrue(res) for checker in gradient_checkers: res, grad, grad_estimated = checker.CheckSimple( op, [X], 0, [0]) self.assertTrue(res)
def testMaxPoolingLegacyPadding(self): for stride, kernel, legacy_pad, size, order in self.test_configs: print 'MaxPool', stride, kernel, legacy_pad, size, order op = core.CreateOperator("MaxPool")(["X"], ["Y", "Y_maxid"], stride=stride, kernel=kernel, legacy_pad=legacy_pad, order=order) # In order to avoid the problem of race conditions, we will do a randperm # so that the values will be apart at least 0.01 if order == "NHWC": X = np.random.permutation(1 * size * size * 3).reshape( 1, size, size, 3).astype(np.float32) * 0.01 else: X = np.random.permutation(1 * size * size * 3).reshape( 1, 3, size, size).astype(np.float32) * 0.01 res = device_checker.CheckSimple(op, [X], [0]) self.assertTrue(res) for checker in gradient_checkers: res, grad, grad_estimated = checker.CheckSimple( op, [X], 0, [0]) self.assertTrue(res)
def testDepthConcatNCHW(self): for input_size, d1, d2, d3, d4 in self.test_configs: op = core.CreateOperator("DepthConcat")(["X1", "X2", "X3", "X4"], ["Y", "Y_dims"], order="NCHW") Xs = [ np.random.rand(2, d1, input_size, input_size).astype(np.float32), np.random.rand(2, d2, input_size, input_size).astype(np.float32), np.random.rand(2, d3, input_size, input_size).astype(np.float32), np.random.rand(2, d4, input_size, input_size).astype(np.float32) ] for i in range(4): res = device_checker.CheckSimple(op, Xs, [0]) self.assertTrue(res) for checker in gradient_checkers: res, grad, grad_estimated = checker.CheckSimple( op, Xs, i, [0]) self.assertTrue(res)
def testConvolutionLegacyPadding(self): for stride, kernel, legacy_pad, size, order in self.test_configs: print 'conv', stride, kernel, legacy_pad, size, order op = core.CreateOperator("Conv")(["X", "w", "b"], ["Y"], stride=stride, kernel=kernel, legacy_pad=legacy_pad, order=order) if order == "NHWC": X = np.random.rand(2, size, size, 3).astype(np.float32) - 0.5 w = np.random.rand(4, kernel, kernel, 3).astype( np.float32) - 0.5 else: X = np.random.rand(2, 3, size, size).astype(np.float32) - 0.5 w = np.random.rand(4, 3, kernel, kernel).astype( np.float32) - 0.5 b = np.random.rand(4).astype(np.float32) - 0.5 res = device_checker.CheckSimple(op, [X, w, b], [0]) self.assertTrue(res) for checker in gradient_checkers: for i in range(3): res, grad, grad_estimated = checker.CheckSimple( op, [X, w, b], i, [0]) self.assertTrue(res)