Ejemplo n.º 1
0
def bench_v1(n: int):
    times = []
    tf.reset_default_graph()
    with tf.device("/%s:0" % (_ARGS_DEVICE)):
        matrix1 = tf.Variable(tf.ones((n, n), dtype=_ARGS_DTYPE))
        matrix2 = tf.Variable(tf.ones((n, n), dtype=_ARGS_DTYPE))
        product = tf.matmul(matrix1, matrix2)

    config = tf.ConfigProto()
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        # warmup
        sess.run(product.op)

        for i in range(_ARGS_REPS):
            start = time.monotonic()
            for j in range(_ARGS_MINI_BATCH):
                sess.run(product.op)
            times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)

    ops = n**3 + (
        n - 1
    ) * n**2 * _ARGS_MINI_BATCH  # n^2*(n-1) additions, n^3 multiplications
    rate = ops / elapsed_ms / 10**6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('%d x %d matmul took:   \t%.4f ms,\t %.2f GFLOPS' % (
        n,
        n,
        elapsed_ms,
        rate,
    ),
          file=sys.stderr)
    return rate, elapsed_ms
Ejemplo n.º 2
0
def bench(n):
    if _ARGS_DEVICE == 'gpu':
        if torch.cuda.is_available():
            number_GPU = torch.cuda.device_count()
            current_device = torch.cuda.current_device()
            torch.cuda.device(current_device)
            name_GPU = torch.cuda.get_device_name(current_device)
            device = torch.device('cuda')
        else:
            raise Exception("No GPU available")
    else:
        device = torch.device('cpu')

    matrix1 = torch.ones((n, n), dtype=_ARGS_DTYPE_MAP[_ARGS_DTYPE]["dtype"], device=device)
    matrix2 = torch.ones((n, n), dtype=_ARGS_DTYPE_MAP[_ARGS_DTYPE]["dtype"], device=device)

    times = []

    for i in range(_ARGS_REPS):
        start = time.monotonic()
        product = torch.mm(matrix1, matrix2)
        times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)

    ops = n ** 3 + (n - 1) * n ** 2  # n^2*(n-1) additions, n^3 multiplications
    rate = ops / elapsed_ms / 10 ** 6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('%d x %d matmul took:   \t%.4f ms,\t %.2f GFLOPS' % (n, n, elapsed_ms, rate,), file=sys.stderr)
    return rate, elapsed_ms
Ejemplo n.º 3
0
def bench_v2(n: int):
    times = []
    with tf.device("/%s:0" % (_ARGS_DEVICE)):
        matrix1 = tf.Variable(tf.ones((n, n), dtype=_ARGS_DTYPE))
        matrix2 = tf.Variable(tf.ones((n, n), dtype=_ARGS_DTYPE))

        for i in range(_ARGS_REPS):
            start = time.monotonic()
            for j in range(_ARGS_MINI_BATCH):
                product = tf.matmul(matrix1, matrix2)
            times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)

    ops = n**3 + (
        n - 1
    ) * n**2 * _ARGS_MINI_BATCH  # n^2*(n-1) additions, n^3 multiplications
    rate = ops / elapsed_ms / 10**6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('%d x %d matmul took:   \t%.4f ms,\t %.2f GFLOPS' % (
        n,
        n,
        elapsed_ms,
        rate,
    ),
          file=sys.stderr)
    return rate, elapsed_ms
Ejemplo n.º 4
0
def bench(
    batch: int,
    tensor_input_height: int,
    tensor_input_width: int,
    tensor_input_channels: int,
    tensor_output_channels: int,
    filter_height: int,
    filter_width: int,
):
    if _ARGS_DEVICE == "gpu":
        if torch.cuda.is_available():
            number_GPU = torch.cuda.device_count()
            current_device = torch.cuda.current_device()
            torch.cuda.device(current_device)
            name_GPU = torch.cuda.get_device_name(current_device)
            device = torch.device("cuda")
        else:
            raise Exception("No GPU available")
    else:
        device = torch.device("cpu")

    input_tensor = torch.ones(
        (batch, tensor_input_channels, tensor_input_height,
         tensor_input_width),
        dtype=_ARGS_DTYPE_MAP[_ARGS_DTYPE]["dtype"],
        device=device,
    )

    convolution = torch.nn.Conv2d(
        in_channels=tensor_input_channels,
        out_channels=tensor_output_channels,
        kernel_size=(filter_height, filter_width),
        stride=(_ARGS_STRIDES, _ARGS_STRIDES),
        padding=(_ARGS_PADDING, _ARGS_PADDING),
    )

    times = []

    for i in range(_ARGS_REPS):
        start = time.monotonic()
        convolution(input_tensor)
        times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)
    # Source:
    # https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/python/profiler/internal/flops_registry.py#L381
    # Formula:
    #  batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim
    #  * input_depth * output_depth * 2 / (image_x_stride * image_x_stride)
    ops = (batch * tensor_input_height * tensor_input_width * filter_height *
           filter_width * tensor_input_channels * tensor_output_channels *
           2) / (_ARGS_STRIDES * _ARGS_STRIDES)
    rate = ops / elapsed_ms / 10**6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print("conv took:   \t%.4f ms,\t %.2f GFLOPS" % (elapsed_ms, rate),
          file=sys.stderr)

    return rate, elapsed_ms
Ejemplo n.º 5
0
def bench(
    batch: int,
    tensor_input_width: int,
    tensor_input_channels: int,
    filter_width: int,
    filter_input_channels: int,
    filter_output_channels: int,
):
    if _ARGS_DEVICE == 'gpu':
        if torch.cuda.is_available():
            number_GPU = torch.cuda.device_count()
            current_device = torch.cuda.current_device()
            torch.cuda.device(current_device)
            name_GPU = torch.cuda.get_device_name(current_device)
            device = torch.device('cuda')
        else:
            raise Exception("No GPU available")
    else:
        device = torch.device('cpu')

    input_tensor = torch.ones(
        (batch, tensor_input_channels, tensor_input_width),
        dtype=_ARGS_DTYPE_MAP[_ARGS_DTYPE]["dtype"],
        device=device
    )

    convolution = torch.nn.Conv1d(
        in_channels=tensor_input_channels,
        out_channels=filter_output_channels,
        kernel_size=(filter_width),
        stride=_ARGS_STRIDE,
        padding=_ARGS_PADDING)

    times = []

    for i in range(_ARGS_REPS):
        start = time.monotonic()
        convolution(input_tensor)
        times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)
    # Formula:
    #  batch_size * x_dim * kernel_x_dim 
    #  * input_depth * output_depth * 2 / (x_stride)
    ops = (
        batch
        * tensor_input_width
        * filter_width
        * tensor_input_channels
        * filter_output_channels
        * 2
    ) / (_ARGS_STRIDE)
    rate = ops / elapsed_ms / 10 ** 6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('conv took:   \t%.4f ms,\t %.2f GFLOPS' % (elapsed_ms, rate), file=sys.stderr)

    return rate, elapsed_ms
Ejemplo n.º 6
0
def bench_v2():
    """Use v2 API for printing hello world."""
    start = time.monotonic()

    for i in range(_ARGS_REPS):
        hello = tf.constant("Hello, TensorFlow by Thoth!")
        tf.print(hello, output_stream=sys.stderr)
        del hello

    return time.monotonic() - start
Ejemplo n.º 7
0
def bench_v1():
    """Use v1 API for printing hello world."""
    start = time.monotonic()
    for i in range(_ARGS_REPS):
        hello = tf.constant("Hello, TensorFlow by Thoth!")
        sess = tf.Session()
        print(sess.run(hello), file=sys.stderr)
        del hello
        del sess

    return time.monotonic() - start
Ejemplo n.º 8
0
def bench_v1(batch: int, tensor_input_height: int, tensor_input_width: int,
             tensor_input_channels: int, filter_height: int, filter_width: int,
             filter_input_channels: int, filter_output_channels: int):
    times = []
    with tf.device("/%s:0" % (_ARGS_DEVICE)):
        init_tensor, stride = create_initial_tensor(
            batch=batch,
            tensor_input_height=tensor_input_height,
            tensor_input_width=tensor_input_width,
            tensor_input_channels=tensor_input_channels)

        init_filter = tf.Variable(
            tf.ones([
                filter_height,
                filter_width,
                filter_input_channels,
                filter_output_channels,
            ]),
            dtype=_ARGS_DTYPE,
        )
        convolution = tf.nn.conv2d(
            init_tensor,
            filter=init_filter,
            strides=stride,
            padding=_ARGS_PADDING,
            data_format=_ARGS_DATA_FORMAT,
        )

    config = tf.ConfigProto()
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        # warmup
        sess.run(convolution.op)

        for i in range(_ARGS_REPS):
            start = time.monotonic()
            sess.run(convolution.op)
            times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)
    # Source: https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/python/profiler/internal/flops_registry.py#L381
    # Formula:
    #  batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim
    #  * input_depth * output_depth * 2 / (image_x_stride * image_x_stride)
    ops = (batch * tensor_input_height * tensor_input_width * filter_height *
           filter_width * tensor_input_channels * filter_output_channels *
           2) / (_ARGS_STRIDES * _ARGS_STRIDES)
    rate = ops / elapsed_ms / 10**6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('conv took:   \t%.4f ms,\t %.2f GFLOPS' % (elapsed_ms, rate),
          file=sys.stderr)

    return rate, elapsed_ms
Ejemplo n.º 9
0
def bench_v1(batch: int, tensor_input_width: int, tensor_input_channels: int,
             filter_width: int, filter_input_channels: int,
             filter_output_channels: int):
    times = []
    tf.reset_default_graph()
    with tf.device("/%s:0" % (_ARGS_DEVICE)):
        init_tensor = create_initial_tensor(
            batch=batch,
            tensor_input_width=tensor_input_width,
            tensor_input_channels=tensor_input_channels)
        init_filter = tf.Variable(
            tf.ones([
                filter_width,
                filter_input_channels,
                filter_output_channels,
            ]),
            dtype=_ARGS_DTYPE,
        )
        convolution = tf.nn.conv1d(
            init_tensor,
            filters=init_filter,
            stride=_ARGS_STRIDE,
            padding=_ARGS_PADDING,
            data_format=_ARGS_DATA_FORMAT,
        )

    config = tf.ConfigProto()
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        # warmup
        sess.run(convolution.op)

        for i in range(_ARGS_REPS):
            start = time.monotonic()
            for j in range(_ARGS_MINI_BATCH):
                sess.run(convolution.op)
            times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)
    # Formula:
    #  batch_size * x_dim * kernel_x_dim
    #  * input_depth * output_depth * 2 / (x_stride)
    ops = (batch * tensor_input_width * filter_width * tensor_input_channels *
           filter_output_channels * _ARGS_MINI_BATCH * 2) / _ARGS_STRIDE
    rate = ops / elapsed_ms / 10**6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('conv took:   \t%.4f ms,\t %.2f GFLOPS' % (elapsed_ms, rate),
          file=sys.stderr)

    return rate, elapsed_ms
Ejemplo n.º 10
0
def bench_v2(batch: int, tensor_input_width: int, tensor_input_channels: int,
             filter_width: int, filter_input_channels: int,
             filter_output_channels: int):
    times = []
    with tf.device("/%s:0" % (_ARGS_DEVICE)):
        init_tensor = create_initial_tensor(
            batch=batch,
            tensor_input_width=tensor_input_width,
            tensor_input_channels=tensor_input_channels)
        init_filter = tf.Variable(
            tf.ones([
                filter_width,
                filter_input_channels,
                filter_output_channels,
            ]),
            dtype=_ARGS_DTYPE,
        )

    for i in range(_ARGS_REPS):
        start = time.monotonic()
        tf.nn.conv1d(
            init_tensor,
            filters=init_filter,
            stride=_ARGS_STRIDE,
            padding=_ARGS_PADDING,
            data_format=_ARGS_DATA_FORMAT,
        )
        times.append(time.monotonic() - start)

    times_ms = 1000 * np.array(times)  # in seconds, convert to ms
    elapsed_ms = np.median(times_ms)
    # Formula:
    #  batch_size * x_dim * kernel_x_dim
    #  * input_depth * output_depth * 2 / (x_stride)
    ops = (batch * tensor_input_width * filter_width * tensor_input_channels *
           filter_output_channels * 2) / (_ARGS_STRIDE)
    rate = ops / elapsed_ms / 10**6  # in GFLOPS. (/ milli / 10**6) == (/ 10 ** 9)
    print('conv took:   \t%.4f ms,\t %.2f GFLOPS' % (elapsed_ms, rate),
          file=sys.stderr)

    return rate, elapsed_ms
Ejemplo n.º 11
0
def main():
    """Main entrypoint."""
    start = time.monotonic()
    import tensorflow as tf

    end = time.monotonic()

    tf_version = tf.__version__
    print("# Version: %s, path: %s" % (tf_version, tf.__path__),
          file=sys.stderr)

    result = {
        "component": "tensorflow",
        "name": "PiImport",
        "@parameters": {},
        "@result": {
            "elapsed": end - start,
        },
        "tensorflow_aicoe_buildinfo": _get_aicoe_tensorflow_build_info(tf),
        "tensorflow_upstream_buildinfo": _get_tensorflow_build_info(tf),
    }
    json.dump(result, sys.stdout, indent=2, sort_keys=True)