Example #1
0
    def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc):
        if (gc.device_type == caffe2_pb2.CUDA and engine == "CUDNN"):
            # cudnn 5.1 does not support int.
            assume(workspace.GetCuDNNVersion() >= 6000 or dtype != np.int32)

        dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32)
        X = (np.random.rand(*dims) * 16).astype(dtype)

        if null_axes:
            axes = None
            op = core.CreateOperator(
                "Transpose",
                ["input"], ["output"],
                engine=engine)
        else:
            np.random.seed(int(seed))
            axes = [int(v) for v in list(np.random.permutation(X.ndim))]
            op = core.CreateOperator(
                "Transpose",
                ["input"], ["output"],
                axes=axes,
                engine=engine)

        def transpose_ref(x, axes):
            return (np.transpose(x, axes),)

        self.assertReferenceChecks(gc, op, [X, axes],
                                   transpose_ref)
Example #2
0
def _cudnn_supports(dilation=False, nhwc=False):
    """Return True if cuDNN supports this configuration."""
    v = workspace.GetCuDNNVersion()
    if dilation and v < 6000:
        # Dilation not supported until v6
        return False
    if dilation and nhwc:
        # Dilation and NHWC not supported together
        return False
    return True
Example #3
0
def _cudnn_supports(dilation=False, nhwc=False, backward=False):
    """Return True if cuDNN supports this configuration."""
    v = workspace.GetCuDNNVersion()
    if backward:
        if nhwc:
            # nhwc isn't supported in backward ops.
            return False
    else:
        # Forward mode.
        if dilation and v < 6000:
            # Dilation not supported until v6
            return False
        if dilation and nhwc:
            # Dilation and NHWC not supported together
            return False
    return True
Example #4
0
    def test_global_pooling(self, size, input_channels, batch_size,
                            order, op_type, engine, gc, dc):
        # CuDNN 5 does not support deterministic max pooling.
        assume(workspace.GetCuDNNVersion() >= 6000 or op_type != "MaxPool")
        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            order=order,
            engine=engine,
            global_pooling=True,
        )
        X = np.random.rand(
            batch_size, size, size, input_channels).astype(np.float32)
        if order == "NCHW":
            X = X.transpose((0, 3, 1, 2))

        self.assertDeviceChecks(dc, op, [X], [0])
        if 'MaxPool' not in op_type:
            self.assertGradientChecks(gc, op, [X], 0, [0])
Example #5
0
    def test_convolution_gradients(self, stride, pad, kernel, dilation, size,
                                   input_channels, output_channels, batch_size,
                                   order, engine, use_bias, gc, dc):
        dkernel = dilation * (kernel - 1) + 1

        # cuDNN v6+ supports dilated convolutions
        if (workspace.GetCuDNNVersion() < 6000):
            assume("" == engine or 1 == dilation)
        assume(engine != "MKLDNN" or use_bias is True)

        op = core.CreateOperator(
            "Conv",
            ["X", "w", "b"] if use_bias else ["X", "w"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            dilation=dilation,
            pad=pad,
            order=order,
            engine=engine,
        )
        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(
            output_channels, kernel, kernel, input_channels).astype(np.float32)\
            - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        if order == "NCHW":
            X = X.transpose((0, 3, 1, 2))
            w = w.transpose((0, 3, 1, 2))

        inputs = [X, w, b] if use_bias else [X, w]
        # Error handling path.
        if size + pad + pad < dkernel or size + pad + pad < dkernel:
            with self.assertRaises(RuntimeError):
                self.assertDeviceChecks(dc, op, inputs, [0])
            return

        self.assertDeviceChecks(dc, op, inputs, [0])
        for i in range(len(inputs)):
            self.assertGradientChecks(gc, op, inputs, i, [0])
Example #6
0
    def test_global_max_pool_nchw(self, op_type, sz, batch_size, engine, gc,
                                  dc):
        ''' Special test to stress the fast path of NCHW max pool '''
        # CuDNN 5 does not support deterministic max pooling.
        assume(workspace.GetCuDNNVersion() >= 6000 or engine != "CUDNN")
        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            stride=1,
            kernel=sz,
            pad=0,
            order="NCHW",
            engine=engine,
            deterministic=1,
        )

        np.random.seed(1234)
        X = np.random.rand(batch_size, 3, sz, sz).astype(np.float32)

        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-4)
Example #7
0
def main():
    args = parse_args()

    if args.dtype == 'float32':
        args.dtype = 'float'

    # report some available info
    if args.device == 'gpu':
        assert args.num_gpus > 0, "Number of GPUs must be specified in GPU mode"
        print("__caffe2.cuda_version__=%s" % (json.dumps(workspace.GetCUDAVersion())))
        print("__caffe2.cudnn_version__=%s" % (json.dumps(workspace.GetCuDNNVersion())))

    try:
        opts = vars(args)
        opts['phase'] = 'inference' if args.forward_only else 'training'
        model_title, times = benchmark(opts)
    except Exception as err:
        #TODO: this is not happenning, program terminates earlier.
        # For now, do not rely on __results.status__=...
        times = np.zeros(0)
        model_title = 'Unk'
        print ("Critical error while running benchmarks (%s). See stacktrace below." % (str(err)))
        traceback.print_exc(file=sys.stdout)

    if len(times) > 0:
        mean_time = np.mean(times) # seconds
        # Compute mean throughput
        num_local_devices = 1 if args.device == 'cpu' else args.num_gpus  #Number of compute devices per node
        num_devices = num_local_devices * args.num_workers                #Global number of devices
        replica_batch = args.batch_size                                   #Input is a replica batch
        mean_throughput = num_devices * replica_batch / mean_time         #images / sec
        #
        print("__results.time__=%s" % (json.dumps(1000.0 * mean_time)))
        print("__results.throughput__=%s" % (json.dumps(int(mean_throughput))))
        print("__exp.model_title__=%s" % (json.dumps(model_title)))
        print("__results.time_data__=%s" % (json.dumps((1000.0*times).tolist())))
    else:
        print("__results.status__=%s" % (json.dumps("failure")))
Example #8
0
    def test_convolution_layout(self, stride, pad, kernel, dilation, size,
                                input_channels, output_channels, batch_size,
                                use_bias, gc, dc):
        assume(size >= dilation * (kernel - 1) + 1)

        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(
            output_channels, kernel, kernel, input_channels).astype(np.float32)\
            - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        Output = collections.namedtuple("Output", ["Y", "engine", "order"])
        outputs = []

        cudnn_v6p = workspace.GetCuDNNVersion() >= 6000
        dilated_conv = dilation == 1
        # cuDNN v6+ supports dilated convolutions
        engine_list = ["", "CUDNN"] if cudnn_v6p or dilated_conv else [""]

        for order in ["NCHW", "NHWC"]:
            for engine in engine_list:
                op = core.CreateOperator(
                    "Conv",
                    ["X", "w", "b"] if use_bias else ["X", "w"],
                    ["Y"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    device_option=gc,
                )
                if order == "NCHW":
                    X_f = X.transpose((0, 3, 1, 2))
                    w_f = w.transpose((0, 3, 1, 2))
                else:
                    X_f = X
                    w_f = w
                self.assertDeviceChecks(
                    dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0])
                self.ws.create_blob("X").feed(X_f, device_option=gc)
                self.ws.create_blob("w").feed(w_f, device_option=gc)
                self.ws.create_blob("b").feed(b, device_option=gc)
                self.ws.run(op)
                outputs.append(
                    Output(Y=self.ws.blobs["Y"].fetch(),
                           engine=engine,
                           order=order))

        def canonical(o):
            if o.order == "NHWC":
                return o.Y.transpose((0, 3, 1, 2))
            else:
                return o.Y

        for o in outputs:
            np.testing.assert_allclose(canonical(outputs[0]),
                                       canonical(o),
                                       atol=1e-4,
                                       rtol=1e-4)
Example #9
0
def get_nvidia_info():
    return (
        get_nvidia_smi_output(),
        workspace.GetCUDAVersion(),
        workspace.GetCuDNNVersion(),
    )
Example #10
0
        action='store_true',
        help=
        'Enable volta\'s tensor ops (requires CUDA >= 9, cuDNN >= 7 and NVIDIA Volta GPU)'
    )
    args = parser.parse_args()

    if args.dtype == 'float32':
        args.dtype = 'float'

    # report some available info
    if args.device == 'gpu':
        assert args.num_gpus > 0, "Number of GPUs must be specified in GPU mode"
        print("__caffe2.cuda_version__=%s" %
              (json.dumps(workspace.GetCUDAVersion())))
        print("__caffe2.cudnn_version__=%s" %
              (json.dumps(workspace.GetCuDNNVersion())))

    try:
        opts = vars(args)
        opts['phase'] = 'inference' if args.forward_only else 'training'
        model_title, times = benchmark(opts)
    except Exception as err:
        #TODO: this is not happenning, program terminates earlier.
        # For now, do not rely on __results.status__=...
        times = np.zeros(0)
        model_title = 'Unk'
        print(
            "Critical error while running benchmarks (%s). See stacktrace below."
            % (str(err)))
        traceback.print_exc(file=sys.stdout)
Example #11
0
    parser.add_argument('--num_decode_threads', type=int, required=False, default=1, help='Number of image decode threads. For high throughput models such as AlexNetOWT set to 6-8 for 4 Voltas.')
    parser.add_argument('--float16_compute', nargs='?', const=True, default=False, type=str2bool, help='If true, use FP16 SGD optimizer else use multi-precision SGD optimizer')
    # These parameters affect the ModelHelper behaviour and are now applied for GPU benchmarks
    parser.add_argument('--cudnn_workspace_limit_mb', type=int, required=False, default=64, help='CuDNN workspace limit in MBs')
    parser.add_argument('--use_cudnn', nargs='?', const=True, default=True, type=str2bool, help='Use NVIDIA cuDNN library.')
    parser.add_argument('--cudnn_exhaustive_search', nargs='?', const=True, default=True, type=str2bool, help='Benchmark inference (if true) else benchmark training.')
    args = parser.parse_args()

    if args.dtype == 'float32':
        args.dtype = 'float'

    # report some available info
    if args.device == 'gpu':
        assert args.num_gpus > 0, "Number of GPUs must be specified in GPU mode"
        print("__caffe2.cuda_version__=%s" % (json.dumps(workspace.GetCUDAVersion())))
        print("__caffe2.cudnn_version__=%s" % (json.dumps(workspace.GetCuDNNVersion())))

    try:        
        opts = vars(args)
        opts['phase'] = 'inference' if args.forward_only else 'training'
        model_title, times = benchmark(opts)
    except Exception as err:
        #TODO: this is not happenning, program terminates earlier.
        # For now, do not rely on __results.status__=...
        times = np.zeros(0)
        model_title = 'Unk'
        print ("Critical error while running benchmarks (%s). See stacktrace below." % (str(err)))
        traceback.print_exc(file=sys.stdout)

    if len(times) > 0:
        mean_time = np.mean(times)                        # seconds