Esempio n. 1
0
 def testWrongDimensions(self):
     # The matrix and right-hand sides should have the same number of rows.
     with self.session(use_gpu=True):
         matrix = constant_op.constant([[1., 0.], [0., 1.]])
         rhs = constant_op.constant([[1., 0.]])
         with self.assertRaises(ValueError):
             linalg_ops.matrix_solve_ls(matrix, rhs)
  def benchmarkMatrixSolveLsOp(self):
    run_gpu_test = test_lib.is_gpu_available(True)
    regularizer = 1.0
    for matrix_shape in self.matrix_shapes:
      for num_rhs in 1, 2, matrix_shape[-1]:

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
          self.evaluate(variables.global_variables_initializer())
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=25,
              store_memory_usage=False,
              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))

        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
          with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
            self.evaluate(variables.global_variables_initializer())
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
                      "{num_rhs}").format(
                          matrix_shape=matrix_shape, num_rhs=num_rhs))
 def testWrongDimensions(self):
   # The matrix and right-hand sides should have the same number of rows.
   with self.session():
     matrix = constant_op.constant([[1., 0.], [0., 1.]])
     rhs = constant_op.constant([[1., 0.]])
     with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
       linalg_ops.matrix_solve_ls(matrix, rhs)
 def testWrongDimensions(self):
   # The matrix and right-hand sides should have the same number of rows.
   with self.session(use_gpu=True):
     matrix = constant_op.constant([[1., 0.], [0., 1.]])
     rhs = constant_op.constant([[1., 0.]])
     with self.assertRaises(ValueError):
       linalg_ops.matrix_solve_ls(matrix, rhs)
  def benchmarkMatrixSolveLsOp(self):
    run_gpu_test = test_lib.is_gpu_available(True)
    regularizer = 1.0
    for matrix_shape in self.matrix_shapes:
      for num_rhs in 1, 2, matrix_shape[-1]:

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=25,
              store_memory_usage=False,
              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))

        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
          with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
                      "{num_rhs}").format(
                          matrix_shape=matrix_shape, num_rhs=num_rhs))
Esempio n. 6
0
    def _verifySolve(self,
                     x,
                     y,
                     dtype,
                     use_placeholder,
                     fast,
                     l2_regularizer,
                     batch_shape=()):
        if not fast and l2_regularizer != 0:
            # The slow path does not support regularization.
            return
        maxdim = np.max(x.shape)
        if dtype == np.float32 or dtype == np.complex64:
            tol = maxdim * 5e-4
        else:
            tol = maxdim * 5e-7
            a = x.astype(dtype)
            b = y.astype(dtype)
            if dtype in [np.complex64, np.complex128]:
                a.imag = a.real
                b.imag = b.real
            # numpy.linalg.lstqr does not batching, so we just solve a single system
            # and replicate the solution. and residual norm.
            np_ans = _SolveWithNumpy(x, y, l2_regularizer=l2_regularizer)
            np_r = np.dot(np.conj(a.T), b - np.dot(a, np_ans))
            np_r_norm = np.sqrt(np.sum(np.conj(np_r) * np_r))
            if batch_shape is not ():
                a = np.tile(a, batch_shape + (1, 1))
                b = np.tile(b, batch_shape + (1, 1))
                np_ans = np.tile(np_ans, batch_shape + (1, 1))
                np_r_norm = np.tile(np_r_norm, batch_shape)
            with self.cached_session(use_gpu=fast) as sess:
                if use_placeholder:
                    a_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
                    b_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
                    feed_dict = {a_ph: a, b_ph: b}
                    tf_ans = linalg_ops.matrix_solve_ls(
                        a_ph, b_ph, fast=fast, l2_regularizer=l2_regularizer)
                else:
                    tf_ans = linalg_ops.matrix_solve_ls(
                        a, b, fast=fast, l2_regularizer=l2_regularizer)
                    feed_dict = {}
                    self.assertEqual(np_ans.shape, tf_ans.get_shape())
                if l2_regularizer == 0:
                    # The least squares solution should satisfy A^H * (b - A*x) = 0.
                    tf_r = b - math_ops.matmul(a, tf_ans)
                    tf_r = math_ops.matmul(a, tf_r, adjoint_a=True)
                    tf_r_norm = linalg_ops.norm(tf_r, ord="fro", axis=[-2, -1])
                    tf_ans_val, tf_r_norm_val = sess.run([tf_ans, tf_r_norm],
                                                         feed_dict=feed_dict)
                    self.assertAllClose(np_r_norm,
                                        tf_r_norm_val,
                                        atol=tol,
                                        rtol=tol)
                else:
                    tf_ans_val = sess.run(tf_ans, feed_dict=feed_dict)

            self.assertEqual(np_ans.shape, tf_ans_val.shape)
            self.assertAllClose(np_ans, tf_ans_val, atol=2 * tol, rtol=2 * tol)
  def _verifySolve(self,
                   x,
                   y,
                   dtype,
                   use_placeholder,
                   fast,
                   l2_regularizer,
                   batch_shape=()):
    if not fast and l2_regularizer != 0:
      # The slow path does not support regularization.
      return
    maxdim = np.max(x.shape)
    if dtype == np.float32 or dtype == np.complex64:
      tol = maxdim * 5e-4
    else:
      tol = maxdim * 5e-7
      a = x.astype(dtype)
      b = y.astype(dtype)
      if dtype in [np.complex64, np.complex128]:
        a.imag = a.real
        b.imag = b.real
      # numpy.linalg.lstqr does not batching, so we just solve a single system
      # and replicate the solution. and residual norm.
      np_ans = _SolveWithNumpy(x, y, l2_regularizer=l2_regularizer)
      np_r = np.dot(np.conj(a.T), b - np.dot(a, np_ans))
      np_r_norm = np.sqrt(np.sum(np.conj(np_r) * np_r))
      if batch_shape is not ():
        a = np.tile(a, batch_shape + (1, 1))
        b = np.tile(b, batch_shape + (1, 1))
        np_ans = np.tile(np_ans, batch_shape + (1, 1))
        np_r_norm = np.tile(np_r_norm, batch_shape)
      with self.cached_session(use_gpu=fast) as sess:
        if use_placeholder:
          a_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
          b_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
          feed_dict = {a_ph: a, b_ph: b}
          tf_ans = linalg_ops.matrix_solve_ls(
              a_ph, b_ph, fast=fast, l2_regularizer=l2_regularizer)
        else:
          tf_ans = linalg_ops.matrix_solve_ls(
              a, b, fast=fast, l2_regularizer=l2_regularizer)
          feed_dict = {}
          self.assertEqual(np_ans.shape, tf_ans.get_shape())
        if l2_regularizer == 0:
          # The least squares solution should satisfy A^H * (b - A*x) = 0.
          tf_r = b - math_ops.matmul(a, tf_ans)
          tf_r = math_ops.matmul(a, tf_r, adjoint_a=True)
          tf_r_norm = linalg_ops.norm(tf_r, ord="fro", axis=[-2, -1])
          tf_ans_val, tf_r_norm_val = sess.run(
              [tf_ans, tf_r_norm], feed_dict=feed_dict)
          self.assertAllClose(np_r_norm, tf_r_norm_val, atol=tol, rtol=tol)
        else:
          tf_ans_val = sess.run(tf_ans, feed_dict=feed_dict)

      self.assertEqual(np_ans.shape, tf_ans_val.shape)
      self.assertAllClose(np_ans, tf_ans_val, atol=2 * tol, rtol=2 * tol)
 def testEmpty(self):
   full = np.array([[1., 2.], [3., 4.], [5., 6.]])
   empty0 = np.empty([3, 0])
   empty1 = np.empty([0, 2])
   for fast in [True, False]:
     with self.cached_session(use_gpu=True):
       tf_ans = linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (0, 0))
       tf_ans = linalg_ops.matrix_solve_ls(empty0, full, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (0, 2))
       tf_ans = linalg_ops.matrix_solve_ls(full, empty0, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (2, 0))
       tf_ans = linalg_ops.matrix_solve_ls(empty1, empty1, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (2, 2))
Esempio n. 9
0
 def testEmpty(self):
   full = np.array([[1., 2.], [3., 4.], [5., 6.]])
   empty0 = np.empty([3, 0])
   empty1 = np.empty([0, 2])
   for fast in [True, False]:
     with self.test_session(use_gpu=True):
       tf_ans = linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (0, 0))
       tf_ans = linalg_ops.matrix_solve_ls(empty0, full, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (0, 2))
       tf_ans = linalg_ops.matrix_solve_ls(full, empty0, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (2, 0))
       tf_ans = linalg_ops.matrix_solve_ls(empty1, empty1, fast=fast).eval()
       self.assertEqual(tf_ans.shape, (2, 2))
 def _verifySolveBatch(self, x, y):
     # Since numpy.linalg.lsqr does not support batch solves, as opposed
     # to numpy.linalg.solve, we just perform this test for a fixed batch size
     # of 2x3.
     for np_type in [np.float32, np.float64]:
         a = np.tile(x.astype(np_type), [2, 3, 1, 1])
         b = np.tile(y.astype(np_type), [2, 3, 1, 1])
         np_ans = np.empty([2, 3, a.shape[-1], b.shape[-1]])
         for dim1 in range(2):
             for dim2 in range(3):
                 np_ans[dim1, dim2, :, :], _, _, _ = np.linalg.lstsq(
                     a[dim1, dim2, :, :], b[dim1, dim2, :, :])
         for fast in [True, False]:
             with self.test_session():
                 tf_ans = linalg_ops.matrix_solve_ls(a, b, fast=fast).eval()
             self.assertEqual(np_ans.shape, tf_ans.shape)
             # Check residual norm.
             tf_r = b - BatchMatMul(a, tf_ans)
             tf_r_norm = np.sum(tf_r * tf_r)
             np_r = b - BatchMatMul(a, np_ans)
             np_r_norm = np.sum(np_r * np_r)
             self.assertAllClose(np_r_norm, tf_r_norm)
             # Check solution.
             if fast or a.shape[-2] >= a.shape[-1]:
                 # We skip this test for the underdetermined case when using the
                 # slow path, because Eigen does not return a minimum norm solution.
                 # TODO(rmlarsen): Enable this check for all paths if/when we fix
                 # Eigen's solver.
                 self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
    def _verifySolve(self, x, y):
        for np_type in [np.float32, np.float64, np.complex64, np.complex128]:
            a = x.astype(np_type)
            b = y.astype(np_type)
            if np_type in [np.complex64, np.complex128]:
                a.imag = a.real
                b.imag = b.real
            np_ans, _, _, _ = np.linalg.lstsq(a, b)
            for fast in [True, False]:
                with self.test_session():
                    tf_ans = linalg_ops.matrix_solve_ls(a, b, fast=fast)
                    ans = tf_ans.eval()
                self.assertEqual(np_ans.shape, tf_ans.get_shape())
                self.assertEqual(np_ans.shape, ans.shape)

                # Check residual norm.
                tf_r = b - BatchMatMul(a, ans)
                tf_r_norm = np.sum(tf_r * tf_r)
                np_r = b - BatchMatMul(a, np_ans)
                np_r_norm = np.sum(np_r * np_r)
                self.assertAllClose(np_r_norm, tf_r_norm)

                # Check solution.
                if np_type == np.float32 or np_type == np.complex64:
                    tol = 5e-5
                else:
                    tol = 1e-12
                self.assertAllClose(np_ans, ans, atol=tol, rtol=tol)
 def _verifySolveBatch(self, x, y):
   # Since numpy.linalg.lsqr does not support batch solves, as opposed
   # to numpy.linalg.solve, we just perform this test for a fixed batch size
   # of 2x3.
   for np_type in [np.float32, np.float64]:
     a = np.tile(x.astype(np_type), [2, 3, 1, 1])
     b = np.tile(y.astype(np_type), [2, 3, 1, 1])
     np_ans = np.empty([2, 3, a.shape[-1], b.shape[-1]])
     for dim1 in range(2):
       for dim2 in range(3):
         np_ans[dim1, dim2, :, :], _, _, _ = np.linalg.lstsq(
             a[dim1, dim2, :, :], b[dim1, dim2, :, :])
     for fast in [True, False]:
       with self.test_session():
         tf_ans = linalg_ops.matrix_solve_ls(a, b, fast=fast).eval()
       self.assertEqual(np_ans.shape, tf_ans.shape)
       # Check residual norm.
       tf_r = b - BatchMatMul(a, tf_ans)
       tf_r_norm = np.sum(tf_r * tf_r)
       np_r = b - BatchMatMul(a, np_ans)
       np_r_norm = np.sum(np_r * np_r)
       self.assertAllClose(np_r_norm, tf_r_norm)
       # Check solution.
       if fast or a.shape[-2] >= a.shape[-1]:
         # We skip this test for the underdetermined case when using the
         # slow path, because Eigen does not return a minimum norm solution.
         # TODO(rmlarsen): Enable this check for all paths if/when we fix
         # Eigen's solver.
         self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
Esempio n. 13
0
  def _verifySolve(self, x, y):
    for np_type in [np.float32, np.float64, np.complex64, np.complex128]:
      a = x.astype(np_type)
      b = y.astype(np_type)
      if np_type in [np.complex64, np.complex128]:
        a.imag = a.real
        b.imag = b.real
      np_ans, _, _, _ = np.linalg.lstsq(a, b)
      for fast in [True, False]:
        with self.test_session():
          tf_ans = linalg_ops.matrix_solve_ls(a, b, fast=fast)
          ans = tf_ans.eval()
        self.assertEqual(np_ans.shape, tf_ans.get_shape())
        self.assertEqual(np_ans.shape, ans.shape)

        # Check residual norm.
        tf_r = b - BatchMatMul(a, ans)
        tf_r_norm = np.sum(tf_r * tf_r)
        np_r = b - BatchMatMul(a, np_ans)
        np_r_norm = np.sum(np_r * np_r)
        self.assertAllClose(np_r_norm, tf_r_norm)

        # Check solution.
        if np_type == np.float32 or np_type == np.complex64:
          tol = 5e-5
        else:
          tol = 1e-12
        self.assertAllClose(np_ans, ans, atol=tol, rtol=tol)
Esempio n. 14
0
 def testEmpty(self):
     full = np.array([[1., 2.], [3., 4.], [5., 6.]])
     empty0 = np.empty([3, 0])
     empty1 = np.empty([0, 2])
     for fast in [True, False]:
         tf_ans = self.evaluate(
             linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast))
         self.assertEqual(tf_ans.shape, (0, 0))
         tf_ans = self.evaluate(
             linalg_ops.matrix_solve_ls(empty0, full, fast=fast))
         self.assertEqual(tf_ans.shape, (0, 2))
         tf_ans = self.evaluate(
             linalg_ops.matrix_solve_ls(full, empty0, fast=fast))
         self.assertEqual(tf_ans.shape, (2, 0))
         tf_ans = self.evaluate(
             linalg_ops.matrix_solve_ls(empty1, empty1, fast=fast))
         self.assertEqual(tf_ans.shape, (2, 2))
Esempio n. 15
0
 def testBatchResultSize(self):
   # 3x3x3 matrices, 3x3x1 right-hand sides.
   matrix = np.array([1., 2., 3., 4., 5., 6., 7., 8., 9.] * 3).reshape(3, 3, 3)
   rhs = np.array([1., 2., 3.] * 3).reshape(3, 3, 1)
   answer = linalg_ops.matrix_solve(matrix, rhs)
   ls_answer = linalg_ops.matrix_solve_ls(matrix, rhs)
   self.assertEqual(ls_answer.get_shape(), [3, 3, 1])
   self.assertEqual(answer.get_shape(), [3, 3, 1])
 def testBatchResultSize(self):
   # 3x3x3 matrices, 3x3x1 right-hand sides.
   matrix = np.array([1., 2., 3., 4., 5., 6., 7., 8., 9.] * 3).reshape(3, 3, 3)
   rhs = np.array([1., 2., 3.] * 3).reshape(3, 3, 1)
   answer = linalg_ops.matrix_solve(matrix, rhs)
   ls_answer = linalg_ops.matrix_solve_ls(matrix, rhs)
   self.assertEqual(ls_answer.get_shape(), [3, 3, 1])
   self.assertEqual(answer.get_shape(), [3, 3, 1])
 def testBatchResultSize(self):
   # 3x3x3 matrices, 3x3x1 right-hand sides.
   matrix = np.array([1., 0., 0., 0., 1., 0., 0., 0., 1.] * 3).reshape(3, 3, 3)  # pylint: disable=too-many-function-args
   rhs = np.array([1., 2., 3.] * 3).reshape(3, 3, 1)  # pylint: disable=too-many-function-args
   answer = linalg_ops.matrix_solve(matrix, rhs)
   ls_answer = linalg_ops.matrix_solve_ls(matrix, rhs)
   self.assertEqual(ls_answer.get_shape(), [3, 3, 1])
   self.assertEqual(answer.get_shape(), [3, 3, 1])
Esempio n. 18
0
    def _full(op, grad):
        a = op.inputs[0]
        output = op.outputs[0]

        a_H = math_ops.conj(array_ops.matrix_transpose(a))

        grad_b = linalg_ops.matrix_solve_ls(a_H, grad, fast=False)
        grad_a = -math_ops.matmul(grad_b, output, adjoint_b=True)
        return grad_a, grad_b, None
  def _verifyRegularized(self, x, y, l2_regularizer):
    for np_type in [np.float32, np.float64]:
      # Test with a single matrix.
      a = x.astype(np_type)
      b = y.astype(np_type)
      np_ans = BatchRegularizedLeastSquares(a, b, l2_regularizer)
      with self.test_session():
        # Test matrix_solve_ls on regular matrices
        tf_ans = linalg_ops.matrix_solve_ls(
            a, b, l2_regularizer=l2_regularizer, fast=True).eval()
        self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)

      # Test with a 2x3 batch of matrices.
      a = np.tile(x.astype(np_type), [2, 3, 1, 1])
      b = np.tile(y.astype(np_type), [2, 3, 1, 1])
      np_ans = BatchRegularizedLeastSquares(a, b, l2_regularizer)
      with self.test_session():
        tf_ans = linalg_ops.matrix_solve_ls(
            a, b, l2_regularizer=l2_regularizer, fast=True).eval()
      self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
    def _verifyRegularized(self, x, y, l2_regularizer):
        for np_type in [np.float32, np.float64]:
            # Test with a single matrix.
            a = x.astype(np_type)
            b = y.astype(np_type)
            np_ans = BatchRegularizedLeastSquares(a, b, l2_regularizer)
            with self.test_session():
                # Test matrix_solve_ls on regular matrices
                tf_ans = linalg_ops.matrix_solve_ls(
                    a, b, l2_regularizer=l2_regularizer, fast=True).eval()
                self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)

            # Test with a 2x3 batch of matrices.
            a = np.tile(x.astype(np_type), [2, 3, 1, 1])
            b = np.tile(y.astype(np_type), [2, 3, 1, 1])
            np_ans = BatchRegularizedLeastSquares(a, b, l2_regularizer)
            with self.test_session():
                tf_ans = linalg_ops.matrix_solve_ls(
                    a, b, l2_regularizer=l2_regularizer, fast=True).eval()
            self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
Esempio n. 21
0
          # Alternative shape that consistently produces a valid numerical Jacobian
          shape = extra + (size + 1, size + 1)
          name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
        _AddTest(
            MatrixUnaryFunctorGradientTest, 'MatrixSquareRootGradient', name,
            _GetMatrixUnaryFunctorGradientTest(linalg_ops.matrix_square_root,
                                               dtype, shape))

  # Tests for gradients of matrix_solve_ls
  for dtype in np.float32, np.float64:
    for rows in 2, 5, 10:
      for cols in 2, 5, 10:
        for l2_regularization in 1e-6, 0.001, 1.0:
          shape = (rows, cols)
          name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(str, shape)),
                               l2_regularization)
          float32_tol_fudge = 5.1 if l2_regularization == 1e-6 else 4.0
          _AddTest(
              MatrixBinaryFunctorGradientTest,
              'MatrixSolveLsGradient',
              name,
              # pylint: disable=long-lambda,g-long-lambda
              _GetMatrixBinaryFunctorGradientTest(
                  (lambda a, b, l=l2_regularization:
                   linalg_ops.matrix_solve_ls(a, b, l)),
                  dtype,
                  shape,
                  float32_tol_fudge))

  test_lib.main()
Esempio n. 22
0
        # The numerical Jacobian is consistently invalid for these four shapes
        # because the matrix square root of the perturbed input doesn't exist
        if shape in {(2, 5, 5), (3, 5, 5), (3, 10, 10), (3, 2, 5, 5)}:
          # Alternative shape that consistently produces a valid numerical Jacobian
          shape = extra + (size + 1, size + 1)
          name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
        _AddTest(
            MatrixUnaryFunctorGradientTest, 'MatrixSquareRootGradient', name,
            _GetMatrixUnaryFunctorGradientTest(linalg_ops.matrix_square_root,
                                               dtype, shape))

  # Tests for gradients of matrix_solve_ls
  for dtype in np.float32, np.float64:
    for rows in 2, 5, 10:
      for cols in 2, 5, 10:
        for l2_regularization in 1e-6, 0.001, 1.0:
          shape = (rows, cols)
          name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(
              str, shape)), l2_regularization)
          float32_tol_fudge = 5.1 if l2_regularization == 1e-6 else 4.0
          _AddTest(
              MatrixBinaryFunctorGradientTest,
              'MatrixSolveLsGradient',
              name,
              # pylint: disable=long-lambda,g-long-lambda
              _GetMatrixBinaryFunctorGradientTest(
                  (lambda a, b, l=l2_regularization: linalg_ops.matrix_solve_ls(
                      a, b, l)), dtype, shape, float32_tol_fudge))

  test_lib.main()
      for extra in [(), (2,), (3,)] + [(3, 2)] * (size < 10):
        shape = extra + (size, size)
        name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
        _AddTest(MatrixUnaryFunctorGradientTest, 'MatrixInverseGradient', name,
                 _GetMatrixUnaryFunctorGradientTest(linalg_ops.matrix_inverse,
                                                    dtype, shape))
        _AddTest(
            MatrixUnaryFunctorGradientTest, 'MatrixDeterminantGradient', name,
            _GetMatrixUnaryFunctorGradientTest(linalg_ops.matrix_determinant,
                                               dtype, shape))

  # Tests for gradients of matrix_solve_ls
  for dtype in np.float32, np.float64:
    for rows in 2, 5, 10:
      for cols in 2, 5, 10:
        for l2_regularization in 0.0, 0.001, 1.0:
          shape = (rows, cols)
          name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(str, shape)),
                               l2_regularization)
          _AddTest(
              MatrixBinaryFunctorGradientTest,
              'MatrixSolveLsGradient',
              name,
              _GetMatrixBinaryFunctorGradientTest(
                  lambda a, b, l=l2_regularization: linalg_ops.matrix_solve_ls(a, b, l),
                  dtype,
                  shape,
                  float32_tol_fudge=4.0))

  test_lib.main()
Esempio n. 24
0
            _GetMatrixUnaryFunctorGradientTest(linalg_ops.matrix_determinant,
                                               dtype, shape))
        _AddTest(
            MatrixUnaryFunctorGradientTest, 'LogMatrixDeterminantGradient',
            name,
            _GetMatrixUnaryFunctorGradientTest(
                lambda x: linalg_ops.log_matrix_determinant(x)[1],
                dtype, shape))

  # Tests for gradients of matrix_solve_ls
  for dtype in np.float32, np.float64:
    for rows in 2, 5, 10:
      for cols in 2, 5, 10:
        for l2_regularization in 1e-6, 0.001, 1.0:
          shape = (rows, cols)
          name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(str, shape)),
                               l2_regularization)
          _AddTest(
              MatrixBinaryFunctorGradientTest,
              'MatrixSolveLsGradient',
              name,
              # pylint: disable=long-lambda,g-long-lambda
              _GetMatrixBinaryFunctorGradientTest(
                  (lambda a, b, l=l2_regularization:
                   linalg_ops.matrix_solve_ls(a, b, l)),
                  dtype,
                  shape,
                  float32_tol_fudge=4.0))

  test_lib.main()
    def apply_gradients(
            self,
            grads_and_vars,
            worker_id,
            global_step=None,
            name=None,
            collect_cdfs=False,
            #  batch_idx_list=None, worker_kill_list=None, num_workers=None, num_batches_per_epoch=None):
            matrix_to_solve=None,
            num_batches_per_epoch=None):
        """Apply gradients to variables.
    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.
    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.
    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.
    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []
        var_list = []

        self._local_step = variables.Variable(
            initial_value=0,
            trainable=False,
            collections=[ops.GraphKeys.LOCAL_VARIABLES],
            dtype=global_step.dtype.base_dtype,
            name="sync_rep_local_step")
        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step._ref())
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.all_variables())

        # The wait op waits for the current worker to dequeue a token from its respective token queue
        self._wait_op = self._sync_token_queues[worker_id].dequeue()

        # Replicas have to wait until they can get a token from the token queue
        # BEFORE begining to compute gradients.
        with ops.device(global_step.device):
            queue_size = self._sync_token_queues[worker_id].size()
            update_local_step_op = state_ops.assign(self._local_step,
                                                    global_step._ref())

        # Gradient accum creation
        with ops.name_scope(None, self._name):
            for grad, var in grads_and_vars:
                var_list.append(var)
                tf.logging.info("Grad " + str(grad) + " assigned to " +
                                str(var.device))
                with ops.device(var.device):
                    if grad is None:
                        continue
                    elif isinstance(grad, ops.Tensor):
                        grad_accum = data_flow_ops.ConditionalAccumulator(
                            grad.dtype,
                            shape=var.get_shape(),
                            shared_name=var.name + "/grad_accum")
                    else:
                        if not isinstance(grad, ops.IndexedSlices):
                            raise ValueError("Unknown grad type!")
                        grad_accum = data_flow_ops.SparseConditionalAccumulator(
                            grad.dtype,
                            shape=(),
                            shared_name=var.name + "/grad_accum")

                    self._accumulator_list.append((grad_accum, var))
            """# Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          with ops.device(var.device):
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_grad(grad,
                                                     local_step=self._local_step._ref()))

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_indexed_slices_grad(
                grad, local_step=self._local_step._ref()))"""

            # Phase 1 gradient computation
            with ops.control_dependencies([update_local_step_op]):
                for index, (grad, var) in enumerate(grads_and_vars):
                    print_start_op = logging_ops.Print(
                        global_step, [global_step],
                        message="Starting to apply grads for variable %d" %
                        index)
                    train_ops.append(print_start_op)
                    with ops.device(var.device):
                        work_idx_print = logging_ops.Print(
                            worker_id, [worker_id],
                            message="worker id for comp grad")
                        ps_step_printer0 = logging_ops.Print(
                            global_step, [global_step],
                            message="global step printer0 on ps")
                        train_ops.append(work_idx_print)
                        train_ops.append(ps_step_printer0)
                        '''Implement LS computation and solution here'''
                        #b = np.ones(int(num_batches_per_epoch))
                        b = tf.ones([int(num_batches_per_epoch), 1],
                                    tf.float32)
                        A = matrix_to_solve
                        #            A_for_calc = np.transpose(A)
                        LS_solution = linalg_ops.matrix_solve_ls(A,
                                                                 b,
                                                                 fast=False)
                        LS_calc = tf.reshape(LS_solution, [-1])
                        weight = tf.slice(LS_calc, [worker_id], [1])
                        #            print_ls_op = logging_ops.Print(LS_calc, [LS_calc], message="Solution for LS!")
                        #            train_ops.append(print_ls_op)
                        weighted_grad = tf.scalar_mul(weight[0], grad)
                        '''Kill some workers'''
                        if grad is None:
                            continue

                        elif isinstance(grad, ops.Tensor):
                            grad_accum = self._accumulator_list[index][0]

                            with ops.control_dependencies([print_start_op]):
                                with tf.device("job:worker/task:%d" %
                                               worker_id):
                                    #                  apply_grad_op = grad_accum.apply_grad(grad,
                                    apply_grad_op = grad_accum.apply_grad(
                                        weighted_grad,
                                        local_step=self._local_step._ref())
                                    with ops.control_dependencies(
                                        [apply_grad_op]):
                                        finished_print_op = logging_ops.Print(
                                            global_step, [global_step],
                                            message=
                                            "Done applying grads for variable %d"
                                            % index)
                                        train_ops.append(finished_print_op)

                        else:
                            if not isinstance(grad, ops.IndexedSlices):
                                raise ValueError("Unknown grad type!")
                            grad_accum = self._accumulator_list[index][0]

                            with ops.control_dependencies([print_start_op]):
                                with tf.device("job:worker/task:%d" %
                                               worker_id):
                                    apply_grad_op = grad_accum.apply_indexed_slices_grad(
                                        #                    grad, local_step=self._local_step._ref())
                                        weighted_grad,
                                        local_step=self._local_step._ref())
                                    with ops.control_dependencies(
                                        [apply_grad_op]):
                                        finished_print_op = logging_ops.Print(
                                            global_step, [global_step],
                                            message=
                                            "Done applying grads for variable %d"
                                            % index)
                                        train_ops.append(finished_print_op)

            # Phase 2 gradient applying
            for index, (grad, var) in enumerate(grads_and_vars):
                with ops.device(var.device):
                    grad_accum = self._accumulator_list[index][0]
                    work_idx_print1 = logging_ops.Print(
                        worker_id, [worker_id],
                        message="worker id for aggregate grad")
                    ps_step_printer1 = logging_ops.Print(
                        global_step, [global_step],
                        message="global step printer1 on ps")
                    num_replica_aggragate = logging_ops.Print(
                        self._replicas_to_aggregate,
                        [self._replicas_to_aggregate],
                        message="num replica aggregate")
                    train_ops.append(work_idx_print1)
                    train_ops.append(ps_step_printer1)
                    train_ops.append(num_replica_aggragate)
                    if grad is None:
                        aggregated_grad.append(None)
                    elif isinstance(grad, ops.Tensor):
                        if collect_cdfs:
                            #              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
                            aggregated_grad.append(
                                grad_accum.take_grad(
                                    self._replicas_to_aggregate))
                        else:
                            aggregated_grad.append(grad_accum.take_grad(1))
                    else:
                        if collect_cdfs:
                            #              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
                            aggregated_grad.append(
                                grad_accum.take_grad(
                                    self._replicas_to_aggregate))
                        else:
                            aggregated_grad.append(
                                grad_accum.take_indexed_slices_grad(1))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            # Some debug operations
            self.print_sizes = logging_ops.Print(global_step, [
                self._sync_token_queues[i].size()
                for i in range(self._total_num_replicas)
            ],
                                                 message="queue sizes")
            self.print_accum_sizes = logging_ops.Print(
                self._local_step,
                [x[0].num_accumulated()
                 for x in self._accumulator_list] + [worker_id],
                message="Accum sizes")
            self.print_local_step = logging_ops.Print(
                self._local_step,
                [self._local_step._ref(),
                 global_step._ref()],
                message="local vs global step")

            # sync_op will be assigned to the same device as the global step.
            with ops.device(global_step.device), ops.name_scope(""):
                with ops.control_dependencies([self.print_accum_sizes]):
                    update_op = self._opt.apply_gradients(
                        aggregated_grads_and_vars, global_step)
                    self._update_op = update_op
                    with ops.control_dependencies([update_op]):
                        sync_op = []
                        for cur_worker_id in range(self._total_num_replicas):
                            sync_op.append(
                                self._sync_token_queues[cur_worker_id].enqueue(
                                    global_step))
                        sync_op = control_flow_ops.group(*(sync_op))

                # dummy_queue is passed to the queue runner. Don't use the real queues
                # because the queue runner doesn't automatically reopen it once it
                # closed queues in PS devices.
                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    shared_name="dummy_queue"))

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])

            with ops.device(global_step.device), ops.name_scope(""):
                with ops.control_dependencies(train_ops):
                    # Worker finished applying gradients. Add token to phase1_finished_queue
                    train_op = logging_ops.Print(
                        self._local_step._ref(), [
                            x[0].num_accumulated()
                            for x in self._accumulator_list
                        ] + [worker_id],
                        message="Finished worker updates",
                        name="FinishedWorkerUpdatesPrint")

            for accum, var in self._accumulator_list:
                with ops.device(var.device):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True

            return train_op