Example #1
0
    def benchmarkLuOp(self):
        for shape in self.shapes:
            with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/cpu:0"):
                matrix = variables.Variable(self._GenerateMatrix(shape))
                lu, p = linalg_ops.lu(matrix)
                variables.global_variables_initializer().run()
                self.run_op_benchmark(
                    sess,
                    control_flow_ops.group(lu, p),
                    min_iters=25,
                    name="lu_cpu_{shape}".format(shape=shape))

            if test.is_gpu_available(True):
                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device("/device:GPU:0"):
                    matrix = variables.Variable(self._GenerateMatrix(shape))
                    lu, p = linalg_ops.lu(matrix)
                    variables.global_variables_initializer().run()
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(lu, p),
                        min_iters=25,
                        name="lu_gpu_{shape}".format(shape=shape))
    def benchmarkMatrixInverseOp(self):
        for adjoint in False, True:
            for shape in self.shapes:
                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device("/cpu:0"):
                    matrix = self._GenerateMatrix(shape)
                    inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
                    self.evaluate(variables.global_variables_initializer())
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(inv),
                        min_iters=25,
                        name="matrix_inverse_cpu_{shape}_adjoint_{adjoint}".
                        format(shape=shape, adjoint=adjoint))

                if test.is_gpu_available(True):
                    with ops.Graph().as_default(), \
                        session.Session(config=benchmark.benchmark_config()) as sess, \
                        ops.device("/gpu:0"):
                        matrix = self._GenerateMatrix(shape)
                        inv = linalg_ops.matrix_inverse(matrix,
                                                        adjoint=adjoint)
                        self.evaluate(variables.global_variables_initializer())
                        self.run_op_benchmark(
                            sess,
                            control_flow_ops.group(inv),
                            min_iters=25,
                            name="matrix_inverse_gpu_{shape}_adjoint_{adjoint}"
                            .format(shape=shape, adjoint=adjoint))
Example #3
0
    def benchmarkMatrixExponentialOp(self):
        for shape in self.shapes:
            with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/cpu:0"):
                matrix = self._GenerateMatrix(shape)
                expm = linalg_impl.matrix_exponential(matrix)
                variables.global_variables_initializer().run()
                self.run_op_benchmark(
                    sess,
                    control_flow_ops.group(expm),
                    min_iters=25,
                    name="matrix_exponential_cpu_{shape}".format(shape=shape))

            if test.is_gpu_available(True):
                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device("/gpu:0"):
                    matrix = self._GenerateMatrix(shape)
                    expm = linalg_impl.matrix_exponential(matrix)
                    variables.global_variables_initializer().run()
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(expm),
                        min_iters=25,
                        name="matrix_exponential_gpu_{shape}".format(
                            shape=shape))
Example #4
0
  def benchmarkQROp(self):
    for shape_ in self.shapes:
      with ops.Graph().as_default(), \
          session.Session(config=benchmark.benchmark_config()) as sess, \
          ops.device("/cpu:0"):
        matrix_value = np.random.uniform(
            low=-1.0, high=1.0, size=shape_).astype(np.float32)
        matrix = variables.Variable(matrix_value)
        q, r = linalg_ops.qr(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(q, r),
            min_iters=25,
            name="QR_cpu_{shape}".format(shape=shape_))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/device:GPU:0"):
          matrix_value = np.random.uniform(
              low=-1.0, high=1.0, size=shape_).astype(np.float32)
          matrix = variables.Variable(matrix_value)
          q, r = linalg_ops.qr(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(q, r),
              min_iters=25,
              name="QR_gpu_{shape}".format(shape=shape_))
  def benchmarkMatrixBandPartOp(self):
    for shape_ in self.shapes:
      for limits in (-1, -1), (-1, 0), (0, -1), (2, 2):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix = variables.Variable(array_ops.ones(shape_))
          band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(band),
              min_iters=10,
              name="matrix_band_part_cpu_{shape}_{limits}".format(
                  shape=shape_, limits=limits))

        if test_lib.is_gpu_available(True):
          with ops.Graph().as_default(), \
              session.Session(config=benchmark.benchmark_config()) as sess, \
              ops.device("/gpu:0"):
            matrix = variables.Variable(array_ops.ones(shape_))
            band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(band),
                min_iters=10,
                name="matrix_band_part_gpu_{shape}_{limits}".format(
                    shape=shape_, limits=limits))
Example #6
0
  def benchmarkQROp(self):
    for shape_ in self.shapes:
      with ops.Graph().as_default(), \
          session.Session(config=benchmark.benchmark_config()) as sess, \
          ops.device("/cpu:0"):
        matrix_value = np.random.uniform(
            low=-1.0, high=1.0, size=shape_).astype(np.float32)
        matrix = variables.Variable(matrix_value)
        q, r = linalg_ops.qr(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(q, r),
            min_iters=25,
            name="QR_cpu_{shape}".format(shape=shape_))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/device:GPU:0"):
          matrix_value = np.random.uniform(
              low=-1.0, high=1.0, size=shape_).astype(np.float32)
          matrix = variables.Variable(matrix_value)
          q, r = linalg_ops.qr(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(q, r),
              min_iters=25,
              name="QR_gpu_{shape}".format(shape=shape_))
  def benchmarkMatrixDeterminantOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), session.Session(
          config=benchmark.benchmark_config()) as sess, ops.device("/cpu:0"):
        matrix = self._GenerateMatrix(shape)
        d = linalg_ops.matrix_determinant(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(
                d,),
            min_iters=25,
            name="matrix_determinant_cpu_{shape}".format(shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), session.Session(
            config=benchmark.benchmark_config()) as sess, ops.device("/gpu:0"):
          matrix = self._GenerateMatrix(shape)
          d = linalg_ops.matrix_determinant(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(
                  d,),
              min_iters=25,
              name="matrix_determinant_gpu_{shape}".format(shape=shape))
  def benchmarkMatrixSolveLsOp(self):
    run_gpu_test = test_lib.is_gpu_available(True)
    regularizer = 1.0
    for matrix_shape in self.matrix_shapes:
      for num_rhs in 1, 2, matrix_shape[-1]:

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
          self.evaluate(variables.global_variables_initializer())
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=25,
              store_memory_usage=False,
              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))

        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
          with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
            self.evaluate(variables.global_variables_initializer())
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
                      "{num_rhs}").format(
                          matrix_shape=matrix_shape, num_rhs=num_rhs))
    def benchmarkMatrixDeterminantOp(self):
        for shape in self.shapes:
            with ops.Graph().as_default(), session.Session(
                    config=benchmark.benchmark_config()) as sess, ops.device(
                        "/cpu:0"):
                matrix = self._GenerateMatrix(shape)
                d = linalg_ops.matrix_determinant(matrix)
                self.evaluate(variables.global_variables_initializer())
                self.run_op_benchmark(
                    sess,
                    control_flow_ops.group(d, ),
                    min_iters=25,
                    name="matrix_determinant_cpu_{shape}".format(shape=shape))

            if test.is_gpu_available(True):
                with ops.Graph().as_default(), session.Session(
                        config=benchmark.benchmark_config(
                        )) as sess, ops.device("/gpu:0"):
                    matrix = self._GenerateMatrix(shape)
                    d = linalg_ops.matrix_determinant(matrix)
                    self.evaluate(variables.global_variables_initializer())
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(d, ),
                        min_iters=25,
                        name="matrix_determinant_gpu_{shape}".format(
                            shape=shape))
  def benchmarkMatrixSolveLsOp(self):
    run_gpu_test = test_lib.is_gpu_available(True)
    regularizer = 1.0
    for matrix_shape in self.matrix_shapes:
      for num_rhs in 1, 2, matrix_shape[-1]:

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=25,
              store_memory_usage=False,
              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))

        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
          with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
                      "{num_rhs}").format(
                          matrix_shape=matrix_shape, num_rhs=num_rhs))
Example #11
0
  def benchmarkCholeskyOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), \
          session.Session(config=benchmark.benchmark_config()) as sess, \
          ops.device("/cpu:0"):
        matrix = variables.Variable(self._GenerateMatrix(shape))
        l = linalg_ops.cholesky(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(
                l,),
            min_iters=25,
            name="cholesky_cpu_{shape}".format(shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/device:GPU:0"):
          matrix = variables.Variable(self._GenerateMatrix(shape))
          l = linalg_ops.cholesky(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(
                  l,),
              min_iters=25,
              name="cholesky_gpu_{shape}".format(shape=shape))
  def benchmarkMatrixInverseOp(self):
    for adjoint in False, True:
      for shape in self.shapes:
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix = self._GenerateMatrix(shape)
          inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(inv),
              min_iters=25,
              name="matrix_inverse_cpu_{shape}_adjoint_{adjoint}".format(
                  shape=shape, adjoint=adjoint))

        if test.is_gpu_available(True):
          with ops.Graph().as_default(), \
              session.Session(config=benchmark.benchmark_config()) as sess, \
              ops.device("/gpu:0"):
            matrix = self._GenerateMatrix(shape)
            inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(inv),
                min_iters=25,
                name="matrix_inverse_gpu_{shape}_adjoint_{adjoint}".format(
                    shape=shape, adjoint=adjoint))
Example #13
0
    def benchmarkMatrixBandPartOp(self):
        for shape_ in self.shapes:
            for limits in (-1, -1), (-1, 0), (0, -1), (2, 2):
                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device("/cpu:0"):
                    matrix = variables.Variable(array_ops.ones(shape_))
                    band = array_ops.matrix_band_part(matrix, limits[0],
                                                      limits[1])
                    variables.global_variables_initializer().run()
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(band),
                        min_iters=10,
                        name="matrix_band_part_cpu_{shape}_{limits}".format(
                            shape=shape_, limits=limits))

                if test_lib.is_gpu_available(True):
                    with ops.Graph().as_default(), \
                        session.Session(config=benchmark.benchmark_config()) as sess, \
                        ops.device("/gpu:0"):
                        matrix = variables.Variable(array_ops.ones(shape_))
                        band = array_ops.matrix_band_part(
                            matrix, limits[0], limits[1])
                        variables.global_variables_initializer().run()
                        self.run_op_benchmark(
                            sess,
                            control_flow_ops.group(band),
                            min_iters=10,
                            name="matrix_band_part_gpu_{shape}_{limits}".
                            format(shape=shape_, limits=limits))
Example #14
0
    def benchmarkMatrixSolveOp(self):
        run_gpu_test = test.is_gpu_available(True)
        for adjoint in False, True:
            for matrix_shape in self.matrix_shapes:
                for num_rhs in 1, 2, matrix_shape[-1]:

                    with ops.Graph().as_default(), \
                        session.Session(config=benchmark.benchmark_config()) as sess, \
                        ops.device("/cpu:0"):
                        matrix, rhs = self._GenerateTestData(
                            matrix_shape, num_rhs)
                        x = linalg_ops.matrix_solve(matrix,
                                                    rhs,
                                                    adjoint=adjoint)
                        self.evaluate(variables.global_variables_initializer())
                        self.run_op_benchmark(
                            sess,
                            control_flow_ops.group(x),
                            min_iters=25,
                            store_memory_usage=False,
                            name=
                            ("matrix_solve_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}_"
                             "adjoint_{adjoint}").format(
                                 matrix_shape=matrix_shape,
                                 num_rhs=num_rhs,
                                 adjoint=adjoint))

                    if run_gpu_test:
                        with ops.Graph().as_default(), \
                            session.Session(config=benchmark.benchmark_config()) as sess, \
                            ops.device("/gpu:0"):
                            matrix, rhs = self._GenerateTestData(
                                matrix_shape, num_rhs)
                            x = linalg_ops.matrix_solve(matrix,
                                                        rhs,
                                                        adjoint=adjoint)
                            self.evaluate(
                                variables.global_variables_initializer())
                            self.run_op_benchmark(
                                sess,
                                control_flow_ops.group(x),
                                min_iters=25,
                                store_memory_usage=False,
                                name=
                                ("matrix_solve_gpu_shape_{matrix_shape}_num_rhs_"
                                 "{num_rhs}_adjoint_{adjoint}").format(
                                     matrix_shape=matrix_shape,
                                     num_rhs=num_rhs,
                                     adjoint=adjoint))
    def benchmarkTridiagonalMulOp(self):
      devices = [('/cpu:0', 'cpu')]
      if test.is_gpu_available(cuda_only=True):
        devices += [('/gpu:0', 'gpu')]

      for device_option, size_option in itertools.product(devices, self.sizes):
        device_id, device_name = device_option
        m, batch_size, n = size_option

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device(device_id):
          upper, diag, lower, vec = self._generateData(batch_size, m, n)
          x1 = self.baseline(upper, diag, lower, vec)
          x2 = linalg_impl.tridiagonal_matmul((upper, diag, lower),
                                              vec,
                                              diagonals_format='sequence')

          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x1),
              min_iters=10,
              store_memory_usage=False,
              name=('tridiagonal_matmul_baseline_%s'
                    '_batch_size_%d_m_%d_n_%d' %
                    (device_name, batch_size, m, n)))

          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x2),
              min_iters=10,
              store_memory_usage=False,
              name=('tridiagonal_matmul_%s_batch_size_%d_m_%d_n_%d' %
                    (device_name, batch_size, m, n)))
    def benchmarkTridiagonalSolveOp(self):
      devices = [("/cpu:0", "cpu")]
      if test.is_gpu_available(cuda_only=True):
        devices += [("/gpu:0", "gpu")]

      for device_option, pivoting_option, size_option in \
          itertools.product(devices, self.pivoting_options, self.sizes):

        device_id, device_name = device_option
        pivoting, pivoting_name = pivoting_option
        matrix_size, batch_size, num_rhs = size_option

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device(device_id):
          diags, rhs = self._generateData(matrix_size, batch_size, num_rhs)
          x = linalg_impl.tridiagonal_solve(
              diags, rhs, partial_pivoting=pivoting)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=10,
              store_memory_usage=False,
              name=("tridiagonal_solve_{}_matrix_size_{}_batch_size_{}_"
                    "num_rhs_{}_{}").format(device_name, matrix_size,
                                            batch_size, num_rhs, pivoting_name))
  def benchmarkBatchMatMulBroadcast(self):
    for (a_shape, b_shape) in self.shape_pairs:
      with compat.forward_compatibility_horizon(2019, 4, 26):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix_a = variables.Variable(
              GetRandomNormalInput(a_shape, np.float32))
          matrix_b = variables.Variable(
              GetRandomNormalInput(b_shape, np.float32))
          variables.global_variables_initializer().run()

          # Use batch matmul op's internal broadcasting.
          self.run_op_benchmark(
              sess,
              math_ops.matmul(matrix_a, matrix_b),
              min_iters=50,
              name="batch_matmul_cpu_{}_{}".format(a_shape, b_shape))

          # Manually broadcast the input matrices using the broadcast_to op.
          broadcasted_batch_shape = array_ops.broadcast_static_shape(
              matrix_a.shape[:-2], matrix_b.shape[:-2])
          broadcasted_a_shape = broadcasted_batch_shape.concatenate(
              matrix_a.shape[-2:])
          broadcasted_b_shape = broadcasted_batch_shape.concatenate(
              matrix_b.shape[-2:])
          self.run_op_benchmark(
              sess,
              math_ops.matmul(
                  array_ops.broadcast_to(matrix_a, broadcasted_a_shape),
                  array_ops.broadcast_to(matrix_b, broadcasted_b_shape)),
              min_iters=50,
              name="batch_matmul_manual_broadcast_cpu_{}_{}".format(
                  a_shape, b_shape))
Example #18
0
    def benchmark_einsum(self):
        for equation, dim in self.cases:
            with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device('/cpu:0'):
                r = np.random.RandomState(0)
                input_subscripts = equation.split('->')[0].split(',')
                input_vars = []
                for subscript in input_subscripts:
                    input_shape = (dim, ) * len(subscript)
                    input_vars.append(
                        variables.Variable(
                            np.array(r.randn(*input_shape), np.float32)))
                variables.global_variables_initializer().run()

                if len(input_vars) <= 2:
                    self.run_op_benchmark(
                        sess,
                        special_math_ops.einsum(equation, *input_vars),
                        min_iters=50,
                        name='einsum_cpu_({})_{}'.format(equation, dim))
                else:
                    for optimize in ['greedy', 'auto']:
                        self.run_op_benchmark(
                            sess,
                            special_math_ops.einsum(equation,
                                                    *input_vars,
                                                    optimize=optimize),
                            min_iters=50,
                            name='einsum_cpu_({})_{}_{}'.format(
                                equation, optimize, dim))
    def benchmarkTridiagonalSolveOp(self):
      devices = [("/cpu:0", "cpu")]
      if test.is_gpu_available(cuda_only=True):
        devices += [("/gpu:0", "gpu")]

      for device_option, pivoting_option, size_option in \
          itertools.product(devices, self.pivoting_options, self.sizes):

        device_id, device_name = device_option
        pivoting, pivoting_name = pivoting_option
        matrix_size, batch_size, num_rhs = size_option

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device(device_id):
          diags, rhs = self._generateData(matrix_size, batch_size, num_rhs)
          x = linalg_impl.tridiagonal_solve(
              diags, rhs, partial_pivoting=pivoting)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=10,
              store_memory_usage=False,
              name=("tridiagonal_solve_{}_matrix_size_{}_batch_size_{}_"
                    "num_rhs_{}_{}").format(device_name, matrix_size,
                                            batch_size, num_rhs, pivoting_name))
Example #20
0
    def benchmarkBatchMatMulBroadcast(self):
        for (a_shape, b_shape) in self.shape_pairs:
            with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/cpu:0"):
                matrix_a = variables.Variable(
                    GetRandomNormalInput(a_shape, np.float32))
                matrix_b = variables.Variable(
                    GetRandomNormalInput(b_shape, np.float32))
                variables.global_variables_initializer().run()

                # Use batch matmul op's internal broadcasting.
                self.run_op_benchmark(sess,
                                      math_ops.matmul(matrix_a, matrix_b),
                                      min_iters=50,
                                      name="batch_matmul_cpu_{}_{}".format(
                                          a_shape, b_shape))

                # Manually broadcast the input matrices using the broadcast_to op.
                broadcasted_batch_shape = array_ops.broadcast_static_shape(
                    matrix_a.shape[:-2], matrix_b.shape[:-2])
                broadcasted_a_shape = broadcasted_batch_shape.concatenate(
                    matrix_a.shape[-2:])
                broadcasted_b_shape = broadcasted_batch_shape.concatenate(
                    matrix_b.shape[-2:])
                self.run_op_benchmark(
                    sess,
                    math_ops.matmul(
                        array_ops.broadcast_to(matrix_a, broadcasted_a_shape),
                        array_ops.broadcast_to(matrix_b, broadcasted_b_shape)),
                    min_iters=50,
                    name="batch_matmul_manual_broadcast_cpu_{}_{}".format(
                        a_shape, b_shape))
Example #21
0
 def benchmarkBatchSelect(self):
     for (m, n, use_gpu) in itertools.product([1000, 10000, 100000],
                                              [10, 100, 1000],
                                              [False, True]):
         name = "m_%d_n_%d_use_gpu_%s" % (m, n, use_gpu)
         device = "/%s:0" % ("gpu" if use_gpu else "cpu")
         with ops.Graph().as_default():
             with ops.device(device):
                 x_gen = random_ops.random_uniform([m, n],
                                                   dtype=dtypes.float32)
                 y_gen = random_ops.random_uniform([m, n],
                                                   dtype=dtypes.float32)
                 c_gen = random_ops.random_uniform(
                     [m], dtype=dtypes.float32) <= 0.5
                 x = resource_variable_ops.ResourceVariable(x_gen)
                 y = resource_variable_ops.ResourceVariable(y_gen)
                 c = resource_variable_ops.ResourceVariable(c_gen)
                 op = array_ops.where(c, x, y)
             with session.Session(
                     config=benchmark.benchmark_config()) as sess:
                 self.evaluate(x.initializer)
                 self.evaluate(y.initializer)
                 self.evaluate(c.initializer)
                 r = self.run_op_benchmark(sess,
                                           op,
                                           min_iters=100,
                                           name=name)
                 # approximate size of output: m*n*2 floats for each axis.
                 gb_processed = m * n * 8 / 1.0e9
                 throughput = gb_processed / r["wall_time"]
                 print("Benchmark: %s \t wall_time: %0.03g s \t "
                       "Throughput: %0.03g GB/s" %
                       (name, r["wall_time"], throughput))
                 sys.stdout.flush()
        def _benchmark(self, generate_data_fn, test_name_format_string):
            devices = [("/cpu:0", "cpu")]
            if test.is_gpu_available(cuda_only=True):
                devices += [("/gpu:0", "gpu")]

            for device_option, pivoting_option, size_option in \
                itertools.product(devices, self.pivoting_options, self.sizes):

                device_id, device_name = device_option
                pivoting, pivoting_name = pivoting_option
                matrix_size, batch_size, num_rhs = size_option

                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device(device_id):
                    diags, rhs = generate_data_fn(matrix_size, batch_size,
                                                  num_rhs)
                    # Pivoting is not supported by XLA backends.
                    if test.is_xla_enabled() and pivoting:
                        return
                    x = linalg_impl.tridiagonal_solve(
                        diags, rhs, partial_pivoting=pivoting)
                    self.evaluate(variables.global_variables_initializer())
                    self.run_op_benchmark(sess,
                                          control_flow_ops.group(x),
                                          min_iters=10,
                                          store_memory_usage=False,
                                          name=test_name_format_string.format(
                                              device_name, matrix_size,
                                              batch_size, num_rhs,
                                              pivoting_name))
Example #23
0
 def benchmarkWhere(self):
     for (m, n, p, use_gpu) in itertools.product(
         [10], [10, 100, 1000, 10000, 100000, 1000000], [0.01, 0.5, 0.99],
         [False, True]):
         name = "m_%d_n_%d_p_%g_use_gpu_%s" % (m, n, p, use_gpu)
         device = "/%s:0" % ("gpu" if use_gpu else "cpu")
         with ops.Graph().as_default():
             with ops.device(device):
                 x = random_ops.random_uniform(
                     (m, n), dtype=dtypes.float32) <= p
                 v = resource_variable_ops.ResourceVariable(x)
                 op = array_ops.where(v)
             with session.Session(
                     config=benchmark.benchmark_config()) as sess:
                 self.evaluate(v.initializer)
                 r = self.run_op_benchmark(sess,
                                           op,
                                           min_iters=100,
                                           name=name)
                 gb_processed_input = m * n / 1.0e9
                 # approximate size of output: m*n*p int64s for each axis.
                 gb_processed_output = 2 * 8 * m * n * p / 1.0e9
                 gb_processed = gb_processed_input + gb_processed_output
                 throughput = gb_processed / r["wall_time"]
                 print("Benchmark: %s \t wall_time: %0.03g s \t "
                       "Throughput: %0.03g GB/s" %
                       (name, r["wall_time"], throughput))
                 sys.stdout.flush()
    def benchmarkEinsum(self):
        for equation, dim in self.cases:
            with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device('/cpu:0'):
                r = np.random.RandomState(0)
                input_subscripts = equation.split('->')[0].split(',')
                input_vars = []
                for subscript in input_subscripts:
                    input_shape = (dim, ) * len(subscript)
                    input_vars.append(
                        variables.Variable(
                            np.array(r.randn(*input_shape), np.float32)))
                self.evaluate(variables.global_variables_initializer())

                # Call einsum_v1.
                self.run_op_benchmark(
                    sess,
                    special_math_ops.einsum(equation, *input_vars),
                    min_iters=50,
                    name='einsum_v1_cpu_({})_{}'.format(equation, dim))

                # Call gen_linalg_ops.einsum.
                self.run_op_benchmark(
                    sess,
                    gen_linalg_ops.einsum(input_vars, equation),
                    min_iters=50,
                    name='einsum_v2_cpu_({})_{}'.format(equation, dim))
        def benchmarkTridiagonalMulOp(self):
            devices = [('/cpu:0', 'cpu')]

            for device_id, device_name in devices:
                for batch_size, matrix_size in self.sizes:
                    with ops.Graph().as_default(), \
                        session.Session(config=benchmark.benchmark_config()) as sess, \
                        ops.device(device_id):
                        upper, diag, lower, vec = self._generateData(
                            batch_size, matrix_size)
                        x1 = self.baseline(upper, diag, lower, vec)
                        x2 = linalg_impl.tridiagonal_matmul(
                            (upper, diag, lower), vec)
                        variables.global_variables_initializer().run()
                        self.run_op_benchmark(
                            sess,
                            control_flow_ops.group(x1),
                            min_iters=10,
                            store_memory_usage=False,
                            name=('tridiagonal_matmul_baseline_%s'
                                  '_batch_size_%d_matrix_size_%d' %
                                  (device_name, batch_size, matrix_size)))

                        self.run_op_benchmark(
                            sess,
                            control_flow_ops.group(x2),
                            min_iters=10,
                            store_memory_usage=False,
                            name=
                            ('tridiagonal_matmul_%s_batch_size_%d_matrix_size_%d'
                             % (device_name, batch_size, matrix_size)))
 def benchmarkBatchSelect(self):
   for (m, n, use_gpu) in itertools.product([1000, 10000, 100000],
                                            [10, 100, 1000], [False, True]):
     name = "m_%d_n_%d_use_gpu_%s" % (m, n, use_gpu)
     device = "/%s:0" % ("gpu" if use_gpu else "cpu")
     with ops.Graph().as_default():
       with ops.device(device):
         x_gen = random_ops.random_uniform([m, n], dtype=dtypes.float32)
         y_gen = random_ops.random_uniform([m, n], dtype=dtypes.float32)
         c_gen = random_ops.random_uniform([m], dtype=dtypes.float32) <= 0.5
         x = resource_variable_ops.ResourceVariable(x_gen)
         y = resource_variable_ops.ResourceVariable(y_gen)
         c = resource_variable_ops.ResourceVariable(c_gen)
         op = array_ops.where(c, x, y)
       with session.Session(config=benchmark.benchmark_config()) as sess:
         x.initializer.run()
         y.initializer.run()
         c.initializer.run()
         r = self.run_op_benchmark(sess, op, min_iters=100, name=name)
         # approximate size of output: m*n*2 floats for each axis.
         gb_processed = m * n * 8 / 1.0e9
         throughput = gb_processed / r["wall_time"]
         print("Benchmark: %s \t wall_time: %0.03g s \t "
               "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput))
         sys.stdout.flush()
Example #27
0
 def benchmark_times_an_op(self):
     with session.Session(config=benchmark.benchmark_config()) as sess:
         a = constant_op.constant(0.0)
         a_plus_a = a + a
         return self.run_op_benchmark(sess,
                                      a_plus_a,
                                      min_iters=1000,
                                      store_trace=True,
                                      name="op_benchmark")
Example #28
0
 def benchmark_times_an_op(self):
     input_size = 5
     with session.Session(config=benchmark.benchmark_config()) as sess:
         a = array_ops.placeholder(dtype=dtypes.float32, shape=(input_size))
         a_plus_a = a + a
         return self.run_op_benchmark(sess,
                                      a_plus_a,
                                      feed_dict={a: np.arange(input_size)},
                                      min_iters=1000,
                                      store_trace=True,
                                      name="op_benchmark")
Example #29
0
 def benchmark_times_an_op(self):
   input_size = 5
   with session.Session(config=benchmark.benchmark_config()) as sess:
     a = array_ops.placeholder(dtype=dtypes.float32, shape=(input_size))
     a_plus_a = a + a
     return self.run_op_benchmark(
         sess,
         a_plus_a,
         feed_dict={a: np.arange(input_size)},
         min_iters=1000,
         store_trace=True,
         name="op_benchmark")
Example #30
0
    def _run_and_report_graphmode(self, fn, iters, burn_iters, benchmark_name,
                                  xprof_enabled, **kwargs):
        """Runs and reports benchmarks in graph mode."""
        if self.input_data is None:
            raise ValueError('Input data is missing for {} benchmark'.format(
                benchmark_name))

        # Uses the benchmark config to disable the static graph optimizations
        with session.Session(config=benchmark.benchmark_config()) as sess:
            if hasattr(self, 'iterator'):
                sess.run(self.iterator.initializer)

            sess.run(lookup_ops.tables_initializer())
            sess.run(variables_lib.global_variables_initializer())

            inputs = sess.run(self.input_data)

            @def_function.function
            def benchmark_op(data):
                return fn(data, **kwargs)

            def run_benchmark():
                for _ in range(burn_iters):
                    sess.run(benchmark_op(inputs))
                total_time = 0
                for _ in range(iters):
                    start_time = time.time()
                    sess.run(benchmark_op(inputs))
                    total_time += time.time() - start_time

                return total_time

            total_time = run_benchmark()
            mean_time = total_time / iters
            extras = {'sec_per_batch': mean_time}

            metrics = []
            if hasattr(self, 'batch_number'):
                extras.update(
                    {'batches_per_sec': self.batch_number / mean_time})
                metrics.append({
                    'name': 'batches_per_sec',
                    'value': self.batch_number / mean_time
                })

            if xprof_enabled:
                extras.update(self._run_with_xprof(run_benchmark))

            self.report_benchmark(wall_time=mean_time,
                                  name=benchmark_name + '_graph',
                                  extras=extras,
                                  metrics=metrics)
Example #31
0
 def benchmarkMatrixLogarithmOp(self):
     for shape in self.shapes:
         with ops.Graph().as_default(), \
             session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
             matrix = self._GenerateMatrix(shape)
             logm = gen_linalg_ops.matrix_logarithm(matrix)
             variables.global_variables_initializer().run()
             self.run_op_benchmark(
                 sess,
                 control_flow_ops.group(logm),
                 min_iters=25,
                 name="matrix_logarithm_cpu_{shape}".format(shape=shape))
  def benchmarkMatrixSolveOp(self):
    run_gpu_test = test.is_gpu_available(True)
    for adjoint in False, True:
      for matrix_shape in self.matrix_shapes:
        for num_rhs in 1, 2, matrix_shape[-1]:

          with ops.Graph().as_default(), \
              session.Session(config=benchmark.benchmark_config()) as sess, \
              ops.device("/cpu:0"):
            matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}_"
                      "adjoint_{adjoint}").format(
                          matrix_shape=matrix_shape,
                          num_rhs=num_rhs,
                          adjoint=adjoint))

          if run_gpu_test:
            with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
              matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs)
              x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint)
              variables.global_variables_initializer().run()
              self.run_op_benchmark(
                  sess,
                  control_flow_ops.group(x),
                  min_iters=25,
                  store_memory_usage=False,
                  name=("matrix_solve_gpu_shape_{matrix_shape}_num_rhs_"
                        "{num_rhs}_adjoint_{adjoint}").format(
                            matrix_shape=matrix_shape, num_rhs=num_rhs,
                            adjoint=adjoint))
 def benchmarkMatrixLogarithmOp(self):
   for shape in self.shapes:
     with ops.Graph().as_default(), \
         session.Session(config=benchmark.benchmark_config()) as sess, \
         ops.device("/cpu:0"):
       matrix = self._GenerateMatrix(shape)
       logm = gen_linalg_ops.matrix_logarithm(matrix)
       variables.global_variables_initializer().run()
       self.run_op_benchmark(
           sess,
           control_flow_ops.group(logm),
           min_iters=25,
           name="matrix_logarithm_cpu_{shape}".format(
               shape=shape))
Example #34
0
    def benchmarkVeryLarge2DFloatSparseTensor(self):
        np.random.seed(127)
        num_elements = 10000
        batch_size = 64
        indices_batch = np.random.randint(batch_size,
                                          size=num_elements,
                                          dtype=np.int64)
        indices_value = np.arange(num_elements, dtype=np.int64)
        indices = np.asarray(sorted(zip(indices_batch, indices_value)),
                             dtype=np.int64)
        values = ["feature_value_for_embedding_lookup"] * num_elements
        shape = np.asarray([batch_size, num_elements], dtype=np.int64)
        with session.Session(config=benchmark.benchmark_config()) as sess:
            with ops.device("/cpu:0"):
                indices = variables.Variable(indices)
                values = variables.Variable(values)
                shape = variables.Variable(shape)
                st = sparse_tensor_lib.SparseTensor(indices, values, shape)

                st_handles = add_many_sparse_to_tensors_map(st)
                st_roundtrip = take_many_sparse_from_tensors_map(
                    sparse_map_op=st_handles.op, sparse_handles=st_handles)
                st_roundtrip_op = st_roundtrip.values.op

                st_serialized = sparse_ops.serialize_many_sparse(st)
                st_deserialized = sparse_ops.deserialize_many_sparse(
                    st_serialized, dtype=values.dtype)
                st_deserialized_op = st_deserialized.values.op

                variables.global_variables_initializer().run()

                st_roundtrip_values = self.evaluate(st_roundtrip)
                st_deserialized_values = self.evaluate(st_deserialized)
                np.testing.assert_equal(st_roundtrip_values.values,
                                        st_deserialized_values.values)
                np.testing.assert_equal(st_roundtrip_values.indices,
                                        st_deserialized_values.indices)
                np.testing.assert_equal(st_roundtrip_values.dense_shape,
                                        st_deserialized_values.dense_shape)

                self.run_op_benchmark(
                    sess,
                    st_roundtrip_op,
                    min_iters=2000,
                    name="benchmark_very_large_2d_float_st_tensor_maps")
                self.run_op_benchmark(
                    sess,
                    st_deserialized_op,
                    min_iters=2000,
                    name="benchmark_very_large_2d_float_st_serialization")
  def benchmarkVeryLarge2DFloatSparseTensor(self):
    np.random.seed(127)
    num_elements = 10000
    batch_size = 64
    indices_batch = np.random.randint(
        batch_size, size=num_elements, dtype=np.int64)
    indices_value = np.arange(num_elements, dtype=np.int64)
    indices = np.asarray(
        sorted(zip(indices_batch, indices_value)), dtype=np.int64)
    values = ["feature_value_for_embedding_lookup"] * num_elements
    shape = np.asarray([batch_size, num_elements], dtype=np.int64)
    with session.Session(config=benchmark.benchmark_config()) as sess:
      with ops.device("/cpu:0"):
        indices = variables.Variable(indices)
        values = variables.Variable(values)
        shape = variables.Variable(shape)
        st = sparse_tensor_lib.SparseTensor(indices, values, shape)

        st_handles = add_many_sparse_to_tensors_map(st)
        st_roundtrip = take_many_sparse_from_tensors_map(
            sparse_map_op=st_handles.op, sparse_handles=st_handles)
        st_roundtrip_op = st_roundtrip.values.op

        st_serialized = sparse_ops.serialize_many_sparse(st)
        st_deserialized = sparse_ops.deserialize_many_sparse(
            st_serialized, dtype=values.dtype)
        st_deserialized_op = st_deserialized.values.op

        variables.global_variables_initializer().run()

        st_roundtrip_values = sess.run(st_roundtrip)
        st_deserialized_values = sess.run(st_deserialized)
        np.testing.assert_equal(st_roundtrip_values.values,
                                st_deserialized_values.values)
        np.testing.assert_equal(st_roundtrip_values.indices,
                                st_deserialized_values.indices)
        np.testing.assert_equal(st_roundtrip_values.dense_shape,
                                st_deserialized_values.dense_shape)

        self.run_op_benchmark(
            sess,
            st_roundtrip_op,
            min_iters=2000,
            name="benchmark_very_large_2d_float_st_tensor_maps")
        self.run_op_benchmark(
            sess,
            st_deserialized_op,
            min_iters=2000,
            name="benchmark_very_large_2d_float_st_serialization")
 def _BenchmarkGrad(grad_fn, name, device):
   for shape in self.shapes:
     matrix = self._GenerateMatrix(shape)
     with ops.Graph().as_default(), \
         session.Session(config=benchmark.benchmark_config()) as sess, \
         ops.device(device):
       l = variables.Variable(np.linalg.cholesky(matrix))
       grad_matrix = variables.Variable(
           np.random.randn(*matrix.shape).astype(np.float32))
       grad = grad_fn(l, grad_matrix)
       variables.global_variables_initializer().run()
       self.run_op_benchmark(
           sess,
           control_flow_ops.group(
               grad,),
           min_iters=25,
           name="{name}_{dev}_{shape}".format(
               name=name, dev=grad.device, shape=shape))
Example #37
0
 def benchmarkTridiagonalSolveOp(self):
     for matrix_size, batch_size, num_rhs in self.sizes:
         with ops.Graph().as_default(), \
                 session.Session(config=benchmark.benchmark_config()) as sess, \
                 ops.device("/cpu:0"):
             diags, rhs = self._generateData(matrix_size, batch_size,
                                             num_rhs)
             x = linalg_impl.tridiagonal_solve(diags,
                                               rhs,
                                               transpose_rhs=True)
             variables.global_variables_initializer().run()
             self.run_op_benchmark(
                 sess,
                 control_flow_ops.group(x),
                 min_iters=10,
                 store_memory_usage=False,
                 name=("tridiagonal_solve_matrix_size_{}_batch_size_{}_"
                       "num_rhs_{}").format(matrix_size, batch_size,
                                            num_rhs))
Example #38
0
        def benchmarkTridiagonalMulOp(self):
            devices = [('/cpu:0', 'cpu')]
            if test.is_gpu_available(cuda_only=True):
                devices += [('/gpu:0', 'gpu')]

            for device_option, size_option in itertools.product(
                    devices, self.sizes):
                device_id, device_name = device_option
                m, batch_size, n = size_option

                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device(device_id):
                    upper, diag, lower, vec = self._generateData(
                        batch_size, m, n)
                    x1 = self.baseline(upper, diag, lower, vec)
                    x2 = linalg_impl.tridiagonal_matmul(
                        (upper, diag, lower), vec, diagonals_format='sequence')

                    self.evaluate(variables.global_variables_initializer())
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(x1),
                        min_iters=10,
                        store_memory_usage=False,
                        name=('tridiagonal_matmul_baseline_%s'
                              '_batch_size_%d_m_%d_n_%d' %
                              (device_name, batch_size, m, n)))

                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(x2),
                        min_iters=10,
                        store_memory_usage=False,
                        name=('tridiagonal_matmul_%s_batch_size_%d_m_%d_n_%d' %
                              (device_name, batch_size, m, n)))
 def benchmarkWhere(self):
   for (m, n, p, use_gpu) in itertools.product(
       [10],
       [10, 100, 1000, 10000, 100000, 1000000],
       [0.01, 0.5, 0.99],
       [False, True]):
     name = "m_%d_n_%d_p_%g_use_gpu_%s" % (m, n, p, use_gpu)
     device = "/%s:0" % ("gpu" if use_gpu else "cpu")
     with ops.Graph().as_default():
       with ops.device(device):
         x = random_ops.random_uniform((m, n), dtype=dtypes.float32) <= p
         v = resource_variable_ops.ResourceVariable(x)
         op = array_ops.where(v)
       with session.Session(config=benchmark.benchmark_config()) as sess:
         v.initializer.run()
         r = self.run_op_benchmark(sess, op, min_iters=100, name=name)
         gb_processed_input = m * n / 1.0e9
         # approximate size of output: m*n*p int64s for each axis.
         gb_processed_output = 2 * 8 * m * n * p / 1.0e9
         gb_processed = gb_processed_input + gb_processed_output
         throughput = gb_processed / r["wall_time"]
         print("Benchmark: %s \t wall_time: %0.03g s \t "
               "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput))
         sys.stdout.flush()
 def benchmark_unicode_script(self):
   with session.Session(config=benchmark.benchmark_config()) as sess:
     chars = self._generateBenchmarkInput(1000000)
     script = string_ops.unicode_script(chars)
     self.run_op_benchmark(sess, script.op, min_iters=100)
Example #41
0
 def benchmark_unicode_script(self):
     with session.Session(config=benchmark.benchmark_config()) as sess:
         chars = self._generateBenchmarkInput(1000000)
         script = string_ops.unicode_script(chars)
         self.run_op_benchmark(sess, script.op, min_iters=100)
    def run_benchmark(self,
                      shape=(100, 100),
                      ragged_rank=None,
                      dtype=dtypes.float32,
                      fill=None,
                      default_shape=(),
                      output_shape=None,
                      min_iters=1000):
        """Run a benchmark with the specified configuraiton parameters.

    Args:
      shape: Bounding box for the input ragged tensor.
      ragged_rank: Ragged rank for the input ragged tensor.  Defauts to
        `len(shape)-1`.
      dtype: Data type for the input ragged tensor.
      fill: How full each dimension should be (0-1).  Corresponds 1:1 with
        `shape`.  Defaults to 0.8 for each dimension.
      default_shape: Shape for the default (padding) value.
      output_shape: Output shape -- ragged tensor will be padded or cropped to
        this shape.
      min_iters: Minimum iterations for benchmark.
    """
        if ragged_rank is None:
            ragged_rank = len(shape) - 1
        if fill is None:
            fill = [0.8 for _ in shape]

        # Build the inputs for the op.
        rt_input = self._generateRaggedTensor(shape, ragged_rank, dtype, fill)
        default_value = constant_op.constant(self._generateRaggedTensor(
            default_shape, 0, dtype),
                                             dtype=dtype)

        mbs = np.prod(shape) / (2**20)
        with session.Session(config=benchmark.benchmark_config()) as sess:
            extras = {
                'shape': shape,
                'ragged_rank': ragged_rank,
                'dtype': dtype,
                'fill': fill,
                'default_shape': default_shape
            }
            rt = ragged_factory_ops.constant(rt_input,
                                             dtype,
                                             ragged_rank=ragged_rank)

            # Inputs for with_splits:
            splits_rt_placeholder = ragged_factory_ops.placeholder(
                dtype, ragged_rank, shape[ragged_rank + 1:])
            splits_feed_dict = {splits_rt_placeholder: sess.run(rt)}

            # Inputs for with_rowids:
            rowids_feed_dict = {}
            rowids_rt_placeholder = rebuild_ragged_tensor_with_value_rowids(
                rt, rowids_feed_dict, sess)

            # Common arguments for benchmarks:
            run_op_benchmark_kwargs = dict(sess=sess,
                                           store_memory_usage=True,
                                           min_iters=min_iters,
                                           burn_iters=max(5, min_iters // 10),
                                           mbs=mbs,
                                           extras=extras)

            ragged_to_dense_with_splits = ragged_conversion_ops.ragged_to_dense(
                splits_rt_placeholder, default_value=default_value)
            self.run_op_benchmark(op_or_tensor=ragged_to_dense_with_splits.op,
                                  name='ragged_to_dense_with_splits',
                                  feed_dict=splits_feed_dict,
                                  **run_op_benchmark_kwargs)

            ragged_to_tensor_with_splits = splits_rt_placeholder.to_tensor(
                default_value=default_value)
            self.run_op_benchmark(op_or_tensor=ragged_to_tensor_with_splits.op,
                                  name='ragged_to_tensor_with_splits',
                                  feed_dict=splits_feed_dict,
                                  **run_op_benchmark_kwargs)

            ragged_to_dense_with_rowids = ragged_conversion_ops.ragged_to_dense(
                rowids_rt_placeholder, default_value=default_value)
            self.run_op_benchmark(op_or_tensor=ragged_to_dense_with_rowids.op,
                                  name='ragged_to_dense_with_rowids',
                                  feed_dict=rowids_feed_dict,
                                  **run_op_benchmark_kwargs)

            ragged_to_tensor_with_rowids = rowids_rt_placeholder.to_tensor(
                default_value=default_value)
            self.run_op_benchmark(op_or_tensor=ragged_to_tensor_with_rowids.op,
                                  name='ragged_to_tensor_with_rowids',
                                  feed_dict=rowids_feed_dict,
                                  **run_op_benchmark_kwargs)
 def benchmark_times_an_op(self):
   with session.Session(config=benchmark.benchmark_config()) as sess:
     a = constant_op.constant(0.0)
     a_plus_a = a + a
     return self.run_op_benchmark(
         sess, a_plus_a, min_iters=1000, store_trace=True, name="op_benchmark")