def _test_matmul(self, with_batch):
   for use_placeholder in self._use_placeholder_options:
     for build_info in self._operator_build_infos:
       # If batch dimensions are omitted, but there are
       # no batch dimensions for the linear operator, then
       # skip the test case. This is already checked with
       # with_batch=True.
       if not with_batch and len(build_info.shape) <= 2:
         continue
       for dtype in self._dtypes_to_test:
         for adjoint in self._adjoint_options:
           for adjoint_arg in self._adjoint_arg_options:
             with self.test_session(graph=ops.Graph()) as sess:
               sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
               operator, mat, feed_dict = self._operator_and_mat_and_feed_dict(
                   build_info, dtype, use_placeholder=use_placeholder)
               x = self._make_x(
                   operator, adjoint=adjoint, with_batch=with_batch)
               # If adjoint_arg, compute A X^H^H = A X.
               if adjoint_arg:
                 op_matmul = operator.matmul(
                     linalg.adjoint(x),
                     adjoint=adjoint,
                     adjoint_arg=adjoint_arg)
               else:
                 op_matmul = operator.matmul(x, adjoint=adjoint)
               mat_matmul = linear_operator_util.matmul_with_broadcast(
                   mat, x, adjoint_a=adjoint)
               if not use_placeholder:
                 self.assertAllEqual(op_matmul.get_shape(),
                                     mat_matmul.get_shape())
               op_matmul_v, mat_matmul_v = sess.run(
                   [op_matmul, mat_matmul], feed_dict=feed_dict)
               self.assertAC(op_matmul_v, mat_matmul_v)
 def _test_matmul(self, with_batch):
   for use_placeholder in self._use_placeholder_options:
     for build_info in self._operator_build_infos:
       # If batch dimensions are omitted, but there are
       # no batch dimensions for the linear operator, then
       # skip the test case. This is already checked with
       # with_batch=True.
       if not with_batch and len(build_info.shape) <= 2:
         continue
       for dtype in self._dtypes_to_test:
         for adjoint in self._adjoint_options:
           for adjoint_arg in self._adjoint_arg_options:
             with self.session(graph=ops.Graph()) as sess:
               sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
               operator, mat = self._operator_and_matrix(
                   build_info, dtype, use_placeholder=use_placeholder)
               x = self._make_x(
                   operator, adjoint=adjoint, with_batch=with_batch)
               # If adjoint_arg, compute A X^H^H = A X.
               if adjoint_arg:
                 op_matmul = operator.matmul(
                     linalg.adjoint(x),
                     adjoint=adjoint,
                     adjoint_arg=adjoint_arg)
               else:
                 op_matmul = operator.matmul(x, adjoint=adjoint)
               mat_matmul = linear_operator_util.matmul_with_broadcast(
                   mat, x, adjoint_a=adjoint)
               if not use_placeholder:
                 self.assertAllEqual(op_matmul.get_shape(),
                                     mat_matmul.get_shape())
               op_matmul_v, mat_matmul_v = sess.run(
                   [op_matmul, mat_matmul])
               self.assertAC(op_matmul_v, mat_matmul_v)
 def _test_matmul(self, with_batch):
   for use_placeholder in self._use_placeholder_options:
     for build_info in self._operator_build_infos:
       for dtype in self._dtypes_to_test:
         for adjoint in self._adjoint_options:
           for adjoint_arg in self._adjoint_arg_options:
             with self.test_session(graph=ops.Graph()) as sess:
               sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
               operator, mat, feed_dict = self._operator_and_mat_and_feed_dict(
                   build_info, dtype, use_placeholder=use_placeholder)
               x = self._make_x(
                   operator, adjoint=adjoint, with_batch=with_batch)
               # If adjoint_arg, compute A X^H^H = A X.
               if adjoint_arg:
                 op_matmul = operator.matmul(
                     linalg.adjoint(x),
                     adjoint=adjoint,
                     adjoint_arg=adjoint_arg)
               else:
                 op_matmul = operator.matmul(x, adjoint=adjoint)
               mat_matmul = linear_operator_util.matmul_with_broadcast(
                   mat, x, adjoint_a=adjoint)
               if not use_placeholder:
                 self.assertAllEqual(op_matmul.get_shape(),
                                     mat_matmul.get_shape())
               op_matmul_v, mat_matmul_v = sess.run(
                   [op_matmul, mat_matmul], feed_dict=feed_dict)
               self.assertAC(op_matmul_v, mat_matmul_v)
  def test_static_dims_broadcast_y_has_extra_dims_transpose_dynamic(self):
    # Since the second arg has extra dims, and the domain dim of the first arg
    # is larger than the number of linear equations, code will "flip" the extra
    # dims of the first arg to the far right, making extra linear equations
    # (then call the matrix function, then flip back).
    # We have verified that this optimization indeed happens.  How? We stepped
    # through with a debugger.
    x = rng.rand(1, 7, 5)
    y = rng.rand(2, 3, 1, 7)
    x_broadcast = x + np.zeros((2, 3, 1, 1))

    x_ph = array_ops.placeholder(dtypes.float64, [None, None, None])
    y_ph = array_ops.placeholder(dtypes.float64, [None, None, None, None])

    with self.cached_session():
      result = linear_operator_util.matmul_with_broadcast(
          x_ph, y_ph, transpose_a=True, transpose_b=True)
      self.assertAllEqual(4, result.shape.ndims)
      expected = math_ops.matmul(
          x_broadcast, y, transpose_a=True, transpose_b=True)
      self.assertAllClose(expected.eval(),
                          result.eval(feed_dict={
                              x_ph: x,
                              y_ph: y
                          }))
Example #5
0
    def test_static_dims_broadcast_y_has_extra_dims_transpose_dynamic(self):
        # Since the second arg has extra dims, and the domain dim of the first arg
        # is larger than the number of linear equations, code will "flip" the extra
        # dims of the first arg to the far right, making extra linear equations
        # (then call the matrix function, then flip back).
        # We have verified that this optimization indeed happens.  How? We stepped
        # through with a debugger.
        x = rng.rand(1, 7, 5)
        y = rng.rand(2, 3, 1, 7)
        x_broadcast = x + np.zeros((2, 3, 1, 1))

        x_ph = array_ops.placeholder(dtypes.float64, [None, None, None])
        y_ph = array_ops.placeholder(dtypes.float64, [None, None, None, None])

        with self.cached_session():
            result = linear_operator_util.matmul_with_broadcast(
                x_ph, y_ph, transpose_a=True, transpose_b=True)
            self.assertAllEqual(4, result.shape.ndims)
            expected = math_ops.matmul(x_broadcast,
                                       y,
                                       transpose_a=True,
                                       transpose_b=True)
            self.assertAllClose(expected.eval(),
                                result.eval(feed_dict={
                                    x_ph: x,
                                    y_ph: y
                                }))
    def test_basic_statistics_no_latent_variance(self):
        batch_shape = [4, 3]
        num_timesteps = 10
        num_features = 2
        drift_scale = 0.

        design_matrix = self._build_placeholder(
            np.random.randn(*(batch_shape + [num_timesteps, num_features])))

        initial_state_loc = self._build_placeholder(
            np.random.randn(*(batch_shape + [num_features])))
        initial_state_scale = tf.zeros_like(initial_state_loc)
        initial_state_prior = tfd.MultivariateNormalDiag(
            loc=initial_state_loc, scale_diag=initial_state_scale)

        ssm = DynamicLinearRegressionStateSpaceModel(
            num_timesteps=num_timesteps,
            design_matrix=design_matrix,
            drift_scale=drift_scale,
            initial_state_prior=initial_state_prior)

        predicted_time_series = linear_operator_util.matmul_with_broadcast(
            design_matrix, initial_state_loc[..., tf.newaxis])

        self.assertAllEqual(self.evaluate(ssm.mean()), predicted_time_series)
        self.assertAllEqual(
            *self.evaluate((ssm.stddev(),
                            tf.zeros_like(predicted_time_series))))
  def operator_and_matrix(
      self, build_info, dtype, use_placeholder,
      ensure_self_adjoint_and_pd=False):
    shape = list(build_info.shape)
    reflection_axis = linear_operator_test_util.random_sign_uniform(
        shape[:-1], minval=1., maxval=2., dtype=dtype)
    # Make sure unit norm.
    reflection_axis = reflection_axis / linalg_ops.norm(
        reflection_axis, axis=-1, keepdims=True)

    lin_op_reflection_axis = reflection_axis

    if use_placeholder:
      lin_op_reflection_axis = array_ops.placeholder_with_default(
          reflection_axis, shape=None)

    operator = householder.LinearOperatorHouseholder(lin_op_reflection_axis)

    mat = reflection_axis[..., array_ops.newaxis]
    matrix = -2 * linear_operator_util.matmul_with_broadcast(
        mat, mat, adjoint_b=True)
    matrix = array_ops.matrix_set_diag(
        matrix, 1. + array_ops.matrix_diag_part(matrix))

    return operator, matrix
 def _test_matmul(self, with_batch):
     for use_placeholder in self._use_placeholder_options:
         for build_info in self._operator_build_infos:
             for dtype in self._dtypes_to_test:
                 for adjoint in self._adjoint_options:
                     for adjoint_arg in self._adjoint_arg_options:
                         with self.test_session(graph=ops.Graph()) as sess:
                             sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
                             operator, mat, feed_dict = self._operator_and_mat_and_feed_dict(
                                 build_info,
                                 dtype,
                                 use_placeholder=use_placeholder)
                             x = self._make_x(operator,
                                              adjoint=adjoint,
                                              with_batch=with_batch)
                             # If adjoint_arg, compute A X^H^H = A X.
                             if adjoint_arg:
                                 op_matmul = operator.matmul(
                                     linalg.adjoint(x),
                                     adjoint=adjoint,
                                     adjoint_arg=adjoint_arg)
                             else:
                                 op_matmul = operator.matmul(
                                     x, adjoint=adjoint)
                             mat_matmul = linear_operator_util.matmul_with_broadcast(
                                 mat, x, adjoint_a=adjoint)
                             if not use_placeholder:
                                 self.assertAllEqual(
                                     op_matmul.get_shape(),
                                     mat_matmul.get_shape())
                             op_matmul_v, mat_matmul_v = sess.run(
                                 [op_matmul, mat_matmul],
                                 feed_dict=feed_dict)
                             self.assertAC(op_matmul_v, mat_matmul_v)
 def _to_dense(self):
   normalized_axis = self.reflection_axis / linalg.norm(
       self.reflection_axis, axis=-1, keepdims=True)
   mat = normalized_axis[..., array_ops.newaxis]
   matrix = -2 * linear_operator_util.matmul_with_broadcast(
       mat, mat, adjoint_b=True)
   return array_ops.matrix_set_diag(
       matrix, 1. + array_ops.matrix_diag_part(matrix))
Example #10
0
    def _solve(self, rhs, adjoint=False, adjoint_arg=False):
        if self.base_operator.is_non_singular is False:
            raise ValueError(
                "Solve not implemented unless this is a perturbation of a "
                "non-singular LinearOperator.")
        # The Woodbury formula gives:
        # https://en.wikipedia.org/wiki/Woodbury_matrix_identity
        #   (L + UDV^H)^{-1}
        #   = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1}
        #   = L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
        # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U
        # Note also that, with ^{-H} being the inverse of the adjoint,
        #   (L + UDV^H)^{-H}
        #   = L^{-H} - L^{-H} V C^{-H} U^H L^{-H}
        l = self.base_operator
        if adjoint:
            v = self.u
            u = self.v
        else:
            v = self.v
            u = self.u

        # L^{-1} rhs
        linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg)
        # V^H L^{-1} rhs
        vh_linv_rhs = linear_operator_util.matmul_with_broadcast(
            v, linv_rhs, adjoint_a=True)
        # C^{-1} V^H L^{-1} rhs
        if self._use_cholesky:
            capinv_vh_linv_rhs = linear_operator_util.cholesky_solve_with_broadcast(
                self._chol_capacitance, vh_linv_rhs)
        else:
            capinv_vh_linv_rhs = linear_operator_util.matrix_solve_with_broadcast(
                self._capacitance, vh_linv_rhs, adjoint=adjoint)
        # U C^{-1} V^H M^{-1} rhs
        u_capinv_vh_linv_rhs = linear_operator_util.matmul_with_broadcast(
            u, capinv_vh_linv_rhs)
        # L^{-1} U C^{-1} V^H L^{-1} rhs
        linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs,
                                            adjoint=adjoint)

        # L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
        return linv_rhs - linv_u_capinv_vh_linv_rhs
  def _solve(self, rhs, adjoint=False, adjoint_arg=False):
    if self.base_operator.is_non_singular is False:
      raise ValueError(
          "Solve not implemented unless this is a perturbation of a "
          "non-singular LinearOperator.")
    # The Woodbury formula gives:
    # https://en.wikipedia.org/wiki/Woodbury_matrix_identity
    #   (L + UDV^H)^{-1}
    #   = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1}
    #   = L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
    # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U
    # Note also that, with ^{-H} being the inverse of the adjoint,
    #   (L + UDV^H)^{-H}
    #   = L^{-H} - L^{-H} V C^{-H} U^H L^{-H}
    l = self.base_operator
    if adjoint:
      v = self.u
      u = self.v
    else:
      v = self.v
      u = self.u

    # L^{-1} rhs
    linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg)
    # V^H L^{-1} rhs
    vh_linv_rhs = linear_operator_util.matmul_with_broadcast(
        v, linv_rhs, adjoint_a=True)
    # C^{-1} V^H L^{-1} rhs
    if self._use_cholesky:
      capinv_vh_linv_rhs = linear_operator_util.cholesky_solve_with_broadcast(
          self._chol_capacitance, vh_linv_rhs)
    else:
      capinv_vh_linv_rhs = linear_operator_util.matrix_solve_with_broadcast(
          self._capacitance, vh_linv_rhs, adjoint=adjoint)
    # U C^{-1} V^H M^{-1} rhs
    u_capinv_vh_linv_rhs = linear_operator_util.matmul_with_broadcast(
        u, capinv_vh_linv_rhs)
    # L^{-1} U C^{-1} V^H L^{-1} rhs
    linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint)

    # L^{-1} - L^{-1} U C^{-1} V^H L^{-1}
    return linv_rhs - linv_u_capinv_vh_linv_rhs
Example #12
0
    def _matmul(self, x, adjoint=False, adjoint_arg=False):
        u = self.u
        v = self.v
        l = self.base_operator
        d = self.diag_operator

        leading_term = l.matmul(x, adjoint=adjoint, adjoint_arg=adjoint_arg)

        if adjoint:
            uh_x = linear_operator_util.matmul_with_broadcast(
                u, x, adjoint_a=True, adjoint_b=adjoint_arg)
            d_uh_x = d.matmul(uh_x, adjoint=adjoint)
            v_d_uh_x = linear_operator_util.matmul_with_broadcast(v, d_uh_x)
            return leading_term + v_d_uh_x
        else:
            vh_x = linear_operator_util.matmul_with_broadcast(
                v, x, adjoint_a=True, adjoint_b=adjoint_arg)
            d_vh_x = d.matmul(vh_x, adjoint=adjoint)
            u_d_vh_x = linear_operator_util.matmul_with_broadcast(u, d_vh_x)
            return leading_term + u_d_vh_x
  def _matmul(self, x, adjoint=False, adjoint_arg=False):
    u = self.u
    v = self.v
    l = self.base_operator
    d = self.diag_operator

    leading_term = l.matmul(x, adjoint=adjoint, adjoint_arg=adjoint_arg)

    if adjoint:
      uh_x = linear_operator_util.matmul_with_broadcast(
          u, x, adjoint_a=True, adjoint_b=adjoint_arg)
      d_uh_x = d.matmul(uh_x, adjoint=adjoint)
      v_d_uh_x = linear_operator_util.matmul_with_broadcast(
          v, d_uh_x)
      return leading_term + v_d_uh_x
    else:
      vh_x = linear_operator_util.matmul_with_broadcast(
          v, x, adjoint_a=True, adjoint_b=adjoint_arg)
      d_vh_x = d.matmul(vh_x, adjoint=adjoint)
      u_d_vh_x = linear_operator_util.matmul_with_broadcast(u, d_vh_x)
      return leading_term + u_d_vh_x
  def test_static_dims_broadcast(self):
    # batch_shape = [2]
    # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7
    x = rng.rand(2, 1, 3)
    y = rng.rand(3, 7)
    y_broadcast = y + np.zeros((2, 1, 1))

    with self.cached_session():
      result = linear_operator_util.matmul_with_broadcast(x, y)
      self.assertAllEqual((2, 1, 7), result.get_shape())
      expected = math_ops.matmul(x, y_broadcast)
      self.assertAllEqual(expected.eval(), result.eval())
Example #15
0
    def test_simple_regression_correctness(self):
        # Verify that optimizing a simple linear regression by gradient descent
        # recovers the known-correct weights.
        batch_shape = [4, 3]
        num_timesteps = 10
        num_features = 2
        design_matrix = self._build_placeholder(
            np.random.randn(*(batch_shape + [num_timesteps, num_features])))

        true_weights = self._build_placeholder([4., -3.])
        predicted_time_series = linear_operator_util.matmul_with_broadcast(
            design_matrix, true_weights[..., tf.newaxis])

        linear_regression = LinearRegression(
            design_matrix=design_matrix,
            weights_prior=tfd.Independent(tfd.Cauchy(
                loc=self._build_placeholder(np.zeros([num_features])),
                scale=self._build_placeholder(np.ones([num_features]))),
                                          reinterpreted_batch_ndims=1))
        observation_noise_scale_prior = tfd.LogNormal(
            loc=self._build_placeholder(-2),
            scale=self._build_placeholder(0.1))
        model = Sum(
            components=[linear_regression],
            observation_noise_scale_prior=observation_noise_scale_prior)

        learnable_weights = tf.compat.v2.Variable(
            tf.zeros([num_features], dtype=true_weights.dtype))

        def build_loss():
            learnable_ssm = model.make_state_space_model(
                num_timesteps=num_timesteps,
                param_vals={
                    "LinearRegression/_weights": learnable_weights,
                    "observation_noise_scale":
                    observation_noise_scale_prior.mode()
                })
            return -learnable_ssm.log_prob(predicted_time_series)

        # We provide graph- and eager-mode optimization for TF 2.0 compatibility.
        num_train_steps = 80
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1)
        if tf.executing_eagerly():
            for _ in range(num_train_steps):
                optimizer.minimize(build_loss)
        else:
            train_op = optimizer.minimize(build_loss())
            self.evaluate(tf.compat.v1.global_variables_initializer())
            for _ in range(num_train_steps):
                _ = self.evaluate(train_op)
        self.assertAllClose(*self.evaluate((true_weights, learnable_weights)),
                            atol=0.2)
  def _make_capacitance(self):
    # C := D^{-1} + V^H L^{-1} U
    # which is sometimes known as the "capacitance" matrix.

    # L^{-1} U
    linv_u = self.base_operator.solve(self.u)
    # V^H L^{-1} U
    vh_linv_u = linear_operator_util.matmul_with_broadcast(
        self.v, linv_u, adjoint_a=True)

    # D^{-1} + V^H L^{-1} V
    capacitance = self._diag_inv_operator.add_to_tensor(vh_linv_u)
    return capacitance
Example #17
0
    def _make_capacitance(self):
        # C := D^{-1} + V^H L^{-1} U
        # which is sometimes known as the "capacitance" matrix.

        # L^{-1} U
        linv_u = self.base_operator.solve(self.u)
        # V^H L^{-1} U
        vh_linv_u = linear_operator_util.matmul_with_broadcast(self.v,
                                                               linv_u,
                                                               adjoint_a=True)

        # D^{-1} + V^H L^{-1} V
        capacitance = self._diag_inv_operator.add_to_tensor(vh_linv_u)
        return capacitance
  def test_dynamic_dims_broadcast_32bit(self):
    # batch_shape = [2]
    # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7
    x = rng.rand(2, 1, 3)
    y = rng.rand(3, 7)
    y_broadcast = y + np.zeros((2, 1, 1))

    x_ph = array_ops.placeholder(dtypes.float64)
    y_ph = array_ops.placeholder(dtypes.float64)

    with self.test_session() as sess:
      result, expected = sess.run(
          [linear_operator_util.matmul_with_broadcast(x_ph, y_ph),
           math_ops.matmul(x, y_broadcast)],
          feed_dict={x_ph: x, y_ph: y})
      self.assertAllEqual(expected, result)
  def test_static_dims_broadcast_y_has_extra_dims(self):
    # Since the second arg has extra dims, and the domain dim of the first arg
    # is larger than the number of linear equations, code will "flip" the extra
    # dims of the first arg to the far right, making extra linear equations
    # (then call the matrix function, then flip back).
    # We have verified that this optimization indeed happens.  How? We stepped
    # through with a debugger.
    x = rng.rand(5, 7)
    y = rng.rand(2, 3, 7, 5)
    x_broadcast = x + np.zeros((2, 3, 5, 7))

    with self.cached_session():
      result = linear_operator_util.matmul_with_broadcast(x, y)
      self.assertAllEqual((2, 3, 5, 5), result.get_shape())
      expected = math_ops.matmul(x_broadcast, y)
      self.assertAllClose(expected.eval(), result.eval())
Example #20
0
    def test_static_dims_broadcast_y_has_extra_dims(self):
        # Since the second arg has extra dims, and the domain dim of the first arg
        # is larger than the number of linear equations, code will "flip" the extra
        # dims of the first arg to the far right, making extra linear equations
        # (then call the matrix function, then flip back).
        # We have verified that this optimization indeed happens.  How? We stepped
        # through with a debugger.
        x = rng.rand(5, 7)
        y = rng.rand(2, 3, 7, 5)
        x_broadcast = x + np.zeros((2, 3, 5, 7))

        with self.cached_session():
            result = linear_operator_util.matmul_with_broadcast(x, y)
            self.assertAllEqual((2, 3, 5, 5), result.get_shape())
            expected = math_ops.matmul(x_broadcast, y)
            self.assertAllClose(expected.eval(), self.evaluate(result))
    def benchmarkBatchMatMulBroadcast(self):
        for (a_shape, b_shape) in self.shape_pairs:
            with compat.forward_compatibility_horizon(2019, 4, 26):
                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device("/cpu:0"):
                    matrix_a = variables.Variable(
                        GetRandomNormalInput(a_shape, np.float32))
                    matrix_b = variables.Variable(
                        GetRandomNormalInput(b_shape, np.float32))
                    variables.global_variables_initializer().run()

                    # Use batch matmul op's internal broadcasting.
                    self.run_op_benchmark(sess,
                                          math_ops.matmul(matrix_a, matrix_b),
                                          min_iters=50,
                                          name="batch_matmul_cpu_{}_{}".format(
                                              a_shape, b_shape))

                    # Manually broadcast the input matrices using the broadcast_to op.
                    broadcasted_batch_shape = array_ops.broadcast_static_shape(
                        matrix_a.shape[:-2], matrix_b.shape[:-2])
                    broadcasted_a_shape = broadcasted_batch_shape.concatenate(
                        matrix_a.shape[-2:])
                    broadcasted_b_shape = broadcasted_batch_shape.concatenate(
                        matrix_b.shape[-2:])
                    self.run_op_benchmark(
                        sess,
                        math_ops.matmul(
                            array_ops.broadcast_to(matrix_a,
                                                   broadcasted_a_shape),
                            array_ops.broadcast_to(matrix_b,
                                                   broadcasted_b_shape)),
                        min_iters=50,
                        name="batch_matmul_manual_broadcast_cpu_{}_{}".format(
                            a_shape, b_shape))

                    # Use linear_operator_util.matmul_with_broadcast.
                    name_template = (
                        "batch_matmul_manual_broadcast_with_linear_operator_util"
                        "_cpu_{}_{}")
                    self.run_op_benchmark(
                        sess,
                        linear_operator_util.matmul_with_broadcast(
                            matrix_a, matrix_b),
                        min_iters=50,
                        name=name_template.format(a_shape, b_shape))
  def benchmarkBatchMatMulBroadcast(self):
    for (a_shape, b_shape) in self.shape_pairs:
      with compat.forward_compatibility_horizon(2019, 4, 19):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix_a = variables.Variable(
              GetRandomNormalInput(a_shape, np.float32))
          matrix_b = variables.Variable(
              GetRandomNormalInput(b_shape, np.float32))
          variables.global_variables_initializer().run()

          # Use batch matmul op's internal broadcasting.
          self.run_op_benchmark(
              sess,
              math_ops.matmul(matrix_a, matrix_b),
              min_iters=50,
              name="batch_matmul_cpu_{}_{}".format(a_shape, b_shape))

          # Manually broadcast the input matrices using the broadcast_to op.
          broadcasted_batch_shape = array_ops.broadcast_static_shape(
              matrix_a.shape[:-2], matrix_b.shape[:-2])
          broadcasted_a_shape = broadcasted_batch_shape.concatenate(
              matrix_a.shape[-2:])
          broadcasted_b_shape = broadcasted_batch_shape.concatenate(
              matrix_b.shape[-2:])
          self.run_op_benchmark(
              sess,
              math_ops.matmul(
                  array_ops.broadcast_to(matrix_a, broadcasted_a_shape),
                  array_ops.broadcast_to(matrix_b, broadcasted_b_shape)),
              min_iters=50,
              name="batch_matmul_manual_broadcast_cpu_{}_{}".format(
                  a_shape, b_shape))

          # Use linear_operator_util.matmul_with_broadcast.
          name_template = (
              "batch_matmul_manual_broadcast_with_linear_operator_util"
              "_cpu_{}_{}"
          )
          self.run_op_benchmark(
              sess,
              linear_operator_util.matmul_with_broadcast(matrix_a, matrix_b),
              min_iters=50,
              name=name_template.format(a_shape, b_shape))
Example #23
0
    def test_simple_regression_correctness(self):
        # Verify that optimizing a simple linear regression by gradient descent
        # recovers the known-correct weights.
        batch_shape = [4, 3]
        num_timesteps = 10
        num_features = 2
        design_matrix = self._build_placeholder(
            np.random.randn(*(batch_shape + [num_timesteps, num_features])))

        true_weights = self._build_placeholder([4., -3.])
        predicted_time_series = linear_operator_util.matmul_with_broadcast(
            design_matrix, true_weights[..., tf.newaxis])

        linear_regression = LinearRegression(
            design_matrix=design_matrix,
            weights_prior=tfd.Independent(tfd.Cauchy(
                loc=self._build_placeholder(np.zeros([num_features])),
                scale=self._build_placeholder(np.ones([num_features]))),
                                          reinterpreted_batch_ndims=1))
        observation_noise_scale_prior = tfd.LogNormal(
            loc=self._build_placeholder(-2),
            scale=self._build_placeholder(0.1))
        model = Sum(
            components=[linear_regression],
            observation_noise_scale_prior=observation_noise_scale_prior)

        learnable_weights = tf.Variable(
            tf.zeros([num_features], dtype=true_weights.dtype))
        learnable_ssm = model.make_state_space_model(
            num_timesteps=num_timesteps,
            param_vals={
                "LinearRegression/_weights": learnable_weights,
                "observation_noise_scale":
                observation_noise_scale_prior.mode()
            })

        loss = -learnable_ssm.log_prob(predicted_time_series)
        train_op = tf.train.AdamOptimizer(0.1).minimize(loss)
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            for _ in range(80):
                _ = sess.run(train_op)
            self.assertAllClose(*sess.run((true_weights, learnable_weights)),
                                atol=0.2)
Example #24
0
    def test_dynamic_dims_broadcast_64bit(self):
        # batch_shape = [2]
        # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7
        x = rng.rand(2, 1, 3)
        y = rng.rand(3, 7)
        y_broadcast = y + np.zeros((2, 1, 1))

        x_ph = array_ops.placeholder(dtypes.float64)
        y_ph = array_ops.placeholder(dtypes.float64)

        with self.cached_session() as sess:
            result, expected = sess.run([
                linear_operator_util.matmul_with_broadcast(x_ph, y_ph),
                math_ops.matmul(x, y_broadcast)
            ],
                                        feed_dict={
                                            x_ph: x,
                                            y_ph: y
                                        })
            self.assertAllClose(expected, result)
Example #25
0
    def test_basic_statistics(self):
        # Verify that this model constructs a distribution with mean
        # `matmul(design_matrix, weights)` and stddev 0.
        batch_shape = [4, 3]
        num_timesteps = 10
        num_features = 2
        design_matrix = self._build_placeholder(
            np.random.randn(*(batch_shape + [num_timesteps, num_features])))

        linear_regression = LinearRegression(design_matrix=design_matrix)
        true_weights = self._build_placeholder(
            np.random.randn(*(batch_shape + [num_features])))
        predicted_time_series = linear_operator_util.matmul_with_broadcast(
            design_matrix, true_weights[..., tf.newaxis])

        ssm = linear_regression.make_state_space_model(
            num_timesteps=num_timesteps, param_vals={"weights": true_weights})
        self.assertAllEqual(self.evaluate(ssm.mean()), predicted_time_series)
        self.assertAllEqual(
            *self.evaluate((ssm.stddev(),
                            tf.zeros_like(predicted_time_series))))
  def _matmul(self, x, adjoint=False, adjoint_arg=False):
    # Given a vector `v`, we would like to reflect `x` about the hyperplane
    # orthogonal to `v` going through the origin.  We first project `x` to `v`
    # to get v * dot(v, x) / dot(v, v).  After we project, we can reflect the
    # projection about the hyperplane by flipping sign to get
    # -v * dot(v, x) / dot(v, v).  Finally, we can add back the component
    # that is orthogonal to v. This is invariant under reflection, since the
    # whole hyperplane is invariant. This component is equal to x - v * dot(v,
    # x) / dot(v, v), giving the formula x - 2 * v * dot(v, x) / dot(v, v)
    # for the reflection.

    # Note that because this is a reflection, it lies in O(n) (for real vector
    # spaces) or U(n) (for complex vector spaces), and thus is its own adjoint.
    x = linalg.adjoint(x) if adjoint_arg else x
    normalized_axis = self.reflection_axis / linalg.norm(
        self.reflection_axis, axis=-1, keepdims=True)
    mat = normalized_axis[..., array_ops.newaxis]
    x_dot_normalized_v = linear_operator_util.matmul_with_broadcast(
        mat, x, adjoint_a=True)

    return x - 2 * mat * x_dot_normalized_v
Example #27
0
 def _forward(self, x):
     return self._Q_operator.matvec(
         linalg_util.matmul_with_broadcast(self._R, x[..., tf.newaxis])[...,
                                                                        0])
 def _matmul(self, x, adjoint=False, adjoint_arg=False):
   return linear_operator_util.matmul_with_broadcast(
       self._tril, x, adjoint_a=adjoint, adjoint_b=adjoint_arg)
Example #29
0
 def _forward(self, x):
     w = lu_reconstruct(lower_upper=self.lower_upper,
                        perm=self.permutation,
                        validate_args=self.validate_args)
     return linear_operator_util.matmul_with_broadcast(
         w, x[..., tf.newaxis])[..., 0]
Example #30
0
 def _matmul(self, x, adjoint=False, adjoint_arg=False):
     return linear_operator_util.matmul_with_broadcast(
         self._tril, x, adjoint_a=adjoint, adjoint_b=adjoint_arg)
Example #31
0
 def _matmul_right(self, x, adjoint=False, adjoint_arg=False):
   return lou.matmul_with_broadcast(
       x, self._matrix, adjoint_a=adjoint_arg, adjoint_b=adjoint)