def _lower_triangular_mask(shape): """Creates a lower-triangular boolean mask over the last 2 dimensions.""" row_index = math_ops.cumsum( array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-2) col_index = math_ops.cumsum( array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-1) return math_ops.greater_equal(row_index, col_index)
def testClusterSpecPropagationThreeServers2Graphs(self): """Boots 3 servers, creates 2 sessions, ensures appropriate operations. We create 2 clusterspecs: 1. server2 as the master, server1 as a worker 2. server2 as the master, server3 as a worker We ensure that variables on the workers are independent. """ server1 = server_lib.Server.create_local_server() server2 = server_lib.Server.create_local_server() server3 = server_lib.Server.create_local_server() cluster_def1 = cluster_pb2.ClusterDef() job1 = cluster_def1.job.add() job1.name = 'worker1' job1.tasks[0] = server2.target[len('grpc://'):] job1.tasks[1] = server1.target[len('grpc://'):] cluster_def2 = cluster_pb2.ClusterDef() job2 = cluster_def2.job.add() job2.name = 'worker2' job2.tasks[0] = server2.target[len('grpc://'):] job2.tasks[1] = server3.target[len('grpc://'):] config1 = config_pb2.ConfigProto(cluster_def=cluster_def1) config2 = config_pb2.ConfigProto(cluster_def=cluster_def2) with ops.Graph().as_default() as g1: with ops.device('/job:worker1/task:1'): var1 = variables.Variable(array_ops.zeros([2]), name='var1') update_op1 = state_ops.assign_add( var1, array_ops.ones([2]), name='var1_assign_add') init1 = variables.global_variables_initializer() with ops.Graph().as_default() as g2: with ops.device('/job:worker2/task:1'): var2 = variables.Variable(array_ops.zeros([2]), name='var2') update_op2 = state_ops.assign_add( var2, array_ops.ones([2]), name='var2_assign_add') init2 = variables.global_variables_initializer() sess1 = session.Session(server2.target, graph=g1, config=config1) sess2 = session.Session(server2.target, graph=g2, config=config2) init1.run(session=sess1) init2.run(session=sess2) expected_zeros = np.zeros([2]) expected_ones = np.ones([2]) self.assertAllEqual(expected_zeros, sess1.run(var1)) self.assertAllEqual(expected_zeros, sess2.run(var2)) self.assertAllEqual(expected_ones, sess1.run(update_op1)) self.assertAllEqual(expected_ones, sess1.run(var1)) self.assertAllEqual(expected_zeros, sess2.run(var2)) self.assertAllEqual(expected_ones, sess2.run(update_op2)) self.assertAllEqual(expected_ones + expected_ones, sess1.run(update_op1)) self.assertAllEqual(expected_ones, sess2.run(var2)) self.assertAllEqual(expected_ones + expected_ones, sess1.run(var1))
def testShape(self): # Fully known shape. rnd = random_ops.random_gamma([150], 2.0) self.assertEqual([150], rnd.get_shape().as_list()) rnd = random_ops.random_gamma([150], 2.0, beta=[3.0, 4.0]) self.assertEqual([150, 2], rnd.get_shape().as_list()) rnd = random_ops.random_gamma([150], array_ops.ones([1, 2, 3])) self.assertEqual([150, 1, 2, 3], rnd.get_shape().as_list()) rnd = random_ops.random_gamma([20, 30], array_ops.ones([1, 2, 3])) self.assertEqual([20, 30, 1, 2, 3], rnd.get_shape().as_list()) rnd = random_ops.random_gamma( [123], array_ops.placeholder( dtypes.float32, shape=(2,))) self.assertEqual([123, 2], rnd.get_shape().as_list()) # Partially known shape. rnd = random_ops.random_gamma( array_ops.placeholder( dtypes.int32, shape=(1,)), array_ops.ones([7, 3])) self.assertEqual([None, 7, 3], rnd.get_shape().as_list()) rnd = random_ops.random_gamma( array_ops.placeholder( dtypes.int32, shape=(3,)), array_ops.ones([9, 6])) self.assertEqual([None, None, None, 9, 6], rnd.get_shape().as_list()) # Unknown shape. rnd = random_ops.random_gamma( array_ops.placeholder(dtypes.int32), array_ops.placeholder(dtypes.float32)) self.assertIs(None, rnd.get_shape().ndims) rnd = random_ops.random_gamma([50], array_ops.placeholder(dtypes.float32)) self.assertIs(None, rnd.get_shape().ndims)
def benchmarkCudnnLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): config = test_configs[config_name] num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units) params_size_t = model.params_size() input_data = variables.Variable( array_ops.ones([seq_length, batch_size, num_units])) input_h = variables.Variable( array_ops.ones([num_layers, batch_size, num_units])) input_c = variables.Variable( array_ops.ones([num_layers, batch_size, num_units])) params = variables.Variable( array_ops.ones([params_size_t]), validate_shape=False) output, output_h, output_c = model( is_training=True, input_data=input_data, input_h=input_h, input_c=input_c, params=params) all_grads = gradients_impl.gradients( [output, output_h, output_c], [params, input_data, input_h, input_c]) training_op = control_flow_ops.group(*all_grads) self._BenchmarkOp(training_op, "cudnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))
def testCovariance(self): with self.test_session(): vex = ds.VectorExponentialDiag( loc=array_ops.ones([2, 3], dtype=dtypes.float32)) self.assertAllClose( np.diag(np.ones([3], dtype=np.float32)), vex.covariance().eval()) vex = ds.VectorExponentialDiag( loc=array_ops.ones([3], dtype=dtypes.float32), scale_identity_multiplier=[3., 2.]) self.assertAllEqual([2], vex.batch_shape) self.assertAllEqual([3], vex.event_shape) self.assertAllClose( np.array([[[3., 0, 0], [0, 3, 0], [0, 0, 3]], [[2, 0, 0], [0, 2, 0], [0, 0, 2]]])**2., vex.covariance().eval()) vex = ds.VectorExponentialDiag( loc=array_ops.ones([3], dtype=dtypes.float32), scale_diag=[[3., 2, 1], [4, 5, 6]]) self.assertAllEqual([2], vex.batch_shape) self.assertAllEqual([3], vex.event_shape) self.assertAllClose( np.array([[[3., 0, 0], [0, 2, 0], [0, 0, 1]], [[4, 0, 0], [0, 5, 0], [0, 0, 6]]])**2., vex.covariance().eval())
def testRejectionDataListInput(self): batch_size = 20 val_input_batch = [ array_ops.zeros([2, 3, 4]), array_ops.ones([2, 4]), array_ops.ones(2) * 3 ] lbl_input_batch = array_ops.ones([], dtype=dtypes.int32) probs = np.array([0, 1, 0, 0, 0]) val_list, lbls = sampling_ops.stratified_sample( val_input_batch, lbl_input_batch, probs, batch_size, init_probs=[0, 1, 0, 0, 0]) # Check output shapes. self.assertTrue(isinstance(val_list, list)) self.assertEqual(len(val_list), len(val_input_batch)) self.assertTrue(isinstance(lbls, ops.Tensor)) with self.test_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(coord=coord) out = sess.run(val_list + [lbls]) coord.request_stop() coord.join(threads) # Check output shapes. self.assertEqual(len(out), len(val_input_batch) + 1)
def test_noise_decreasing(self): for dtype in [dtypes.float32, dtypes.float64]: with variable_scope.variable_scope(dtype.name): random_model = RandomStateSpaceModel( state_dimension=5, state_noise_dimension=4, configuration=state_space_model.StateSpaceModelConfiguration( dtype=dtype, num_features=1)) random_model.initialize_graph() original_covariance = array_ops.diag( array_ops.ones(shape=[5], dtype=dtype)) _, new_covariance, _ = random_model._exogenous_noise_decreasing( current_times=[[1]], exogenous_values=constant_op.constant([[-2.]], dtype=dtype), state=[ -array_ops.ones(shape=[1, 5], dtype=dtype), original_covariance[None], [0] ]) with self.cached_session() as session: variables.global_variables_initializer().run() evaled_new_covariance, evaled_original_covariance = session.run( [new_covariance[0], original_covariance]) new_variances = numpy.diag(evaled_new_covariance) original_variances = numpy.diag(evaled_original_covariance) for i in range(5): self.assertLess(new_variances[i], original_variances[i])
def test_mixing_eager_and_graph_tensors(self): with ops.Graph().as_default(): x1 = array_ops.ones((3, 3)) x2 = array_ops.ones((3, 3)) self.assertIsInstance(x2, ops.EagerTensor) with self.assertRaisesRegexp(TypeError, 'Graph tensors'): math_ops.matmul(x1, x2)
def testAcceptsTensor(self): tensor = array_ops.ones([10, 10]) result = math_ops.scalar_mul(3, tensor) expected = array_ops.ones([10, 10]) * 3 with self.test_session(use_gpu=True): self.assertAllEqual(expected.eval(), result.eval())
def testRegisterBlocks(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) lc = layer_collection.LayerCollection() lc.register_fully_connected( array_ops.constant(1), array_ops.constant(2), array_ops.constant(3)) lc.register_fully_connected( array_ops.constant(1), array_ops.constant(2), array_ops.constant(3), approx=layer_collection.APPROX_DIAGONAL_NAME) lc.register_conv2d( array_ops.constant(4), [1, 1, 1, 1], 'SAME', array_ops.ones((1, 1, 1, 1)), array_ops.constant(3)) lc.register_conv2d( array_ops.constant(4), [1, 1, 1, 1], 'SAME', array_ops.ones((1, 1, 1, 1)), array_ops.constant(3), approx=layer_collection.APPROX_DIAGONAL_NAME) lc.register_generic( array_ops.constant(5), 16, approx=layer_collection.APPROX_FULL_NAME) lc.register_generic( array_ops.constant(6), 16, approx=layer_collection.APPROX_DIAGONAL_NAME) self.assertEqual(6, len(lc.get_blocks()))
def test_nested_network_inside_network(self): inner_inputs = { 'x1': keras.Input(shape=(1,)), 'x2': keras.Input(shape=(1,)) } inner_outputs = { 'x1+x2': keras.layers.Add()([inner_inputs['x1'], inner_inputs['x2']]), 'x1*x2': keras.layers.Multiply()([inner_inputs['x1'], inner_inputs['x2']]) } inner_network = keras.engine.network.Network(inner_inputs, inner_outputs) inputs = [keras.Input(shape=(1,)), keras.Input(shape=(1,))] middle = inner_network({'x1': inputs[0], 'x2': inputs[1]}) outputs = keras.layers.Add()([middle['x1+x2'], middle['x1*x2']]) network = keras.engine.network.Network(inputs, outputs) network = keras.engine.network.Network.from_config(network.get_config()) # Computes: `(x1+x2) + (x1*x2)` result_tensor = network( [array_ops.ones((1, 1), 'float32'), array_ops.ones((1, 1), 'float32')]) result = self.evaluate(result_tensor) self.assertAllEqual(result, [[3.]]) output_shape = network.compute_output_shape([(None, 1), (None, 1)]) self.assertListEqual(output_shape.as_list(), [None, 1])
def testEagerSingleOutputFloat32(self): with test_util.device(use_gpu=True): a = array_ops.ones((3, 3), dtype=dtypes.float32) x = array_ops.ones((3, 1), dtype=dtypes.float32) output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.float32) ret = self.evaluate(output) self.assertAllClose(ret, [[3.0], [3.0], [3.0]])
def testAcceptsTensor(self): tensor = array_ops.ones([10, 10]) result = math_ops.scalar_mul(3, tensor) expected = array_ops.ones([10, 10]) * 3 with test_util.device(use_gpu=True): self.assertAllEqual(self.evaluate(expected), self.evaluate(result))
def _variance(self): # We need to put the tf.where inside the outer tf.where to ensure we never # hit a NaN in the gradient. denom = array_ops.where(math_ops.greater(self.df, 2.), self.df - 2., array_ops.ones_like(self.df)) # Abs(scale) superfluous. var = (array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) * math_ops.square(self.scale) * self.df / denom) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) result_where_defined = array_ops.where( self.df > array_ops.fill(self.batch_shape_tensor(), 2.), var, array_ops.fill(self.batch_shape_tensor(), inf, name="inf")) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return array_ops.where( math_ops.greater( self.df, array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), result_where_defined, array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones([], dtype=self.dtype), self.df, message="variance not defined for components of df <= 1"), ], result_where_defined)
def benchmarkMatrixBandPartOp(self): for shape_ in self.shapes: for limits in (-1, -1), (-1, 0), (0, -1), (2, 2): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): matrix = variables.Variable(array_ops.ones(shape_)) band = array_ops.matrix_band_part(matrix, limits[0], limits[1]) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(band), min_iters=10, name="matrix_band_part_cpu_{shape}_{limits}".format( shape=shape_, limits=limits)) if test_lib.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/gpu:0"): matrix = variables.Variable(array_ops.ones(shape_)) band = array_ops.matrix_band_part(matrix, limits[0], limits[1]) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(band), min_iters=10, name="matrix_band_part_gpu_{shape}_{limits}".format( shape=shape_, limits=limits))
def testAdamSparse(self): with ops.device('/cpu:0'): # Create 2-D embedding for 3 objects on CPU because sparse/sliced updates # are not implemented on TPU. embedding_matrix = resource_variable_ops.ResourceVariable( array_ops.ones([3, 2])) with self.test_scope(): with backprop.GradientTape() as tape: embedding = embedding_ops.embedding_lookup(embedding_matrix, [1]) y = math_ops.reduce_sum(embedding) dy_dx = tape.gradient(y, embedding_matrix) self.assertIsInstance(dy_dx, ops.IndexedSlices) optimizer = adam.AdamOptimizer(0.1) # The gradient application operations will run on CPU because optimizer # updates are always collocated with the variable. optimizer.apply_gradients([(dy_dx, embedding_matrix)]) # This assign_add will run on CPU because when an input to an # operation is a resource, this operation is placed on the resource's # device by the eager runtime. embedding_matrix.assign_add(array_ops.ones([3, 2])) self.assertAllClose([[2.0, 2.0], [1.9, 1.9], [2.0, 2.0]], embedding_matrix.numpy())
def testServerDefChanged(self): """Update server def, and run ops on new cluster.""" context.set_server_def( server_def=get_server_def( ALT_JOB_NAME, local_server_port=0, remote_server_addresses=[ self._cached_server1_target, self._cached_server2_target ], task_index=0)) with ops.device("job:%s/replica:0/task:1/device:CPU:0" % ALT_JOB_NAME): x1 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x1) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy()) # Set the server def back to JOB_NAME context.set_server_def( server_def=get_server_def( JOB_NAME, local_server_port=0, remote_server_addresses=[ self._cached_server1_target, self._cached_server2_target ], task_index=0)) with ops.device("job:%s/replica:0/task:1/device:CPU:0" % JOB_NAME): x1 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x1) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
def _testOneSimpleInference(self, rnn_mode, num_layers, num_units, input_size, batch_size, seq_length, dir_count, expected, tolerance): model = self._CreateModel(rnn_mode, num_layers, num_units, input_size) has_input_c = (rnn_mode == "lstm") params_size_t = model.params_size() input_data = array_ops.ones([seq_length, batch_size, input_size]) input_h = array_ops.ones([num_layers * dir_count, batch_size, num_units]) params = variables.Variable( array_ops.ones([params_size_t]), validate_shape=False) if has_input_c: input_c = array_ops.ones([num_layers * dir_count, batch_size, num_units]) output, output_h, output_c = model( input_data=input_data, input_h=input_h, input_c=input_c, params=params, is_training=False) else: output, output_h = model( input_data=input_data, input_h=input_h, params=params, is_training=False) output_sum = math_ops.reduce_sum(output) output_h_sum = math_ops.reduce_sum(output_h) total_sum = output_sum + output_h_sum if has_input_c: output_c_sum = math_ops.reduce_sum(output_c) total_sum += output_c_sum with self.test_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) total_sum_v = sess.run([total_sum]) self.assertAllClose( total_sum_v[0], expected, atol=tolerance, rtol=tolerance)
def testEagerSingleOutputInt32(self): a = array_ops.ones((3, 3), dtype=dtypes.int32) x = array_ops.ones((3, 1), dtype=dtypes.int32) output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.int32) with self.test_session(): ret = self.evaluate(output) self.assertAllEqual(ret, [[3], [3], [3]])
def _mode(self): mode = (self.a - 1.0) / (self.a_b_sum - 2.0) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.logical_and(math_ops.greater(self.a, 1.0), math_ops.greater(self.b, 1.0)), mode, array_ops.fill(self.batch_shape(), nan, name="nan"), ) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.a, message="Mode not defined for components of a <= 1.", ), check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.b, message="Mode not defined for components of b <= 1.", ), ], mode, )
def testDtype(self): with self.test_session(): d = array_ops.fill([2, 3], 12., name="fill") self.assertEqual(d.get_shape(), [2, 3]) # Test default type for both constant size and dynamic size z = array_ops.ones([2, 3]) self.assertEqual(z.dtype, dtypes_lib.float32) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3])) z = array_ops.ones(array_ops.shape(d)) self.assertEqual(z.dtype, dtypes_lib.float32) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3])) # Test explicit type control for dtype in (dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64, dtypes_lib.bool): z = array_ops.ones([2, 3], dtype=dtype) self.assertEqual(z.dtype, dtype) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3])) z = array_ops.ones(array_ops.shape(d), dtype=dtype) self.assertEqual(z.dtype, dtype) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3]))
def testGradientsShape(self): shape = [2, 3] alpha = array_ops.ones([2, 2]) beta = array_ops.ones([1, 2]) sample = random_ops.random_gamma(shape, alpha, beta, seed=12345) grads_alpha, grads_beta = gradients_impl.gradients(sample, [alpha, beta]) self.assertAllEqual(grads_alpha.shape, alpha.shape) self.assertAllEqual(grads_beta.shape, beta.shape)
def testUpdatesCollection(self): my_collection_name = '__updates__' _, f1_op = classification.f1_score( predictions=array_ops.ones((10, 1)), labels=array_ops.ones((10, 1)), num_thresholds=3, updates_collections=[my_collection_name]) self.assertListEqual(ops.get_collection(my_collection_name), [f1_op])
def testMetricsCollection(self): my_collection_name = '__metrics__' f1, _ = classification.f1_score( predictions=array_ops.ones((10, 1)), labels=array_ops.ones((10, 1)), num_thresholds=3, metrics_collections=[my_collection_name]) self.assertListEqual(ops.get_collection(my_collection_name), [f1])
def testGradientsShapeWithOneSamplePerParameter(self): shape = [] alpha = array_ops.ones([2, 2]) beta = array_ops.ones([1, 2]) sample = random_ops.random_gamma(shape, alpha, beta) grads_alpha, grads_beta = gradients_impl.gradients(sample, [alpha, beta]) self.assertAllEqual(grads_alpha.shape, alpha.shape) self.assertAllEqual(grads_beta.shape, beta.shape)
def testEagerArrayOutput(self): with test_util.device(use_gpu=True): a = array_ops.ones((3, 3), dtype=dtypes.float32) x = array_ops.ones((3, 1), dtype=dtypes.float32) output = script_ops.eager_py_func( lambda a, x: [matmul(a, x)], inp=[a, x], Tout=[dtypes.float32]) ret = self.evaluate(output) self.assertAllEqual(ret, [[[3.0], [3.0], [3.0]]])
def test(self): condition = core.LabeledTensor(math_ops.range(5) < 3, ['x']) x = core.LabeledTensor(array_ops.ones(5), ['x']) y = core.LabeledTensor(array_ops.zeros(5), ['x']) where_lt = ops.where(condition, x, y) golden_lt = core.LabeledTensor( array_ops.concat([array_ops.ones(3), array_ops.zeros(2)], 0), ['x']) self.assertLabeledTensorsEqual(where_lt, golden_lt)
def testSimpleMatmul(self): """Basic remote eager execution.""" with ops.device("job:%s/replica:0/task:1/device:CPU:0" % JOB_NAME): x1 = array_ops.ones([2, 2]) with ops.device("job:%s/replica:0/task:2/device:CPU:0" % JOB_NAME): x2 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x2) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
def testConnectToRemoteServer(self): """Basic server connection.""" remote.connect_to_remote_host(self._cached_server1_target) with ops.device("job:worker/replica:0/task:1/device:CPU:0"): x1 = array_ops.ones([2, 2]) x2 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x2) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
def test_stable_global_norm_avoids_overflow(self): tensors = [array_ops.ones([4]), array_ops.ones([4, 4]) * 1e19, None] gnorm_is_inf = math_ops.is_inf(clip_ops.global_norm(tensors)) stable_gnorm_is_inf = math_ops.is_inf( tfgan_losses._numerically_stable_global_norm(tensors)) with self.test_session(use_gpu=True): self.assertTrue(gnorm_is_inf.eval()) self.assertFalse(stable_gnorm_is_inf.eval())
def __init__(self, df, loc=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, validate_args=False, allow_nan_stats=True, name="VectorStudentT"): """Instantiates the vector Student's t-distributions on `R^k`. The `batch_shape` is the broadcast between `df.batch_shape` and `Affine.batch_shape` where `Affine` is constructed from `loc` and `scale_*` arguments. The `event_shape` is the event shape of `Affine.event_shape`. Args: df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. Must be scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the same `batch_shape` implied by `loc`, `scale_*`. loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which represents an r x r Diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() graph_parents = [ df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag ] with ops.name_scope(name): with ops.name_scope("init", values=graph_parents): # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In # pseudocode the basic procedure is: # if df.batch_shape is scalar: # if affine.batch_shape is not scalar: # # broadcast distribution.sample so # # it has affine.batch_shape. # self.batch_shape = affine.batch_shape # else: # if affine.batch_shape is scalar: # # let affine broadcasting do its thing. # self.batch_shape = df.batch_shape # All of the above magic is actually handled by TransformedDistribution. # Here we really only need to collect the affine.batch_shape and decide # what we're going to pass in to TransformedDistribution's # (override) batch_shape arg. affine = bijectors.Affine( shift=loc, scale_identity_multiplier=scale_identity_multiplier, scale_diag=scale_diag, scale_tril=scale_tril, scale_perturb_factor=scale_perturb_factor, scale_perturb_diag=scale_perturb_diag, validate_args=validate_args) distribution = student_t.StudentT( df=df, loc=array_ops.zeros([], dtype=affine.dtype), scale=array_ops.ones([], dtype=affine.dtype)) batch_shape, override_event_shape = ( distribution_util.shapes_from_loc_and_scale( affine.shift, affine.scale)) override_batch_shape = distribution_util.pick_vector( distribution.is_scalar_batch(), batch_shape, constant_op.constant([], dtype=dtypes.int32)) super(_VectorStudentT, self).__init__(distribution=distribution, bijector=affine, batch_shape=override_batch_shape, event_shape=override_event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __call__(self, inputs, state): return array_ops.identity(inputs), array_ops.ones( [array_ops.shape(inputs)[0], self.state_size])
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): """Decoder function used in the `dynamic_rnn_decoder` for inference. The main difference between this decoder function and the `decoder_fn` in `attention_decoder_fn_train` is how `next_cell_input` is calculated. In decoder function we calculate the next input by applying an argmax across the feature dimension of the output from the decoder. This is a greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) use beam-search instead. Args: time: positive integer constant reflecting the current timestep. cell_state: state of RNNCell. cell_input: input provided by `dynamic_rnn_decoder`. cell_output: output of RNNCell. context_state: context state provided by `dynamic_rnn_decoder`. Returns: A tuple (done, next state, next input, emit output, next context state) where: done: A boolean vector to indicate which sentences has reached a `end_of_sequence_id`. This is used for early stopping by the `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with all elements as `true` is returned. next state: `cell_state`, this decoder function does not modify the given state. next input: The embedding from argmax of the `cell_output` is used as `next_input`. emit output: If `output_fn is None` the supplied `cell_output` is returned, else the `output_fn` is used to update the `cell_output` before calculating `next_input` and returning `cell_output`. next context state: `context_state`, this decoder function does not modify the given context state. The context state could be modified when applying e.g. beam search. Raises: ValueError: if cell_input is not None. """ with ops.name_scope( name, "attention_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError("Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = array_ops.ones( [batch_size,], dtype=dtype) * (start_of_sequence_id) done = array_ops.zeros([batch_size,], dtype=dtypes.bool) cell_state = encoder_state cell_output = array_ops.zeros( [num_decoder_symbols], dtype=dtypes.float32) cell_input = array_ops.gather(embeddings, next_input_id) cell_type = array_ops.zeros( [3], dtype=dtypes.float32) # init attention attention = _init_attention(encoder_state) else: # construct attention attention = attention_construct_fn(cell_output, attention_keys, attention_values) cell_output = attention #batch*2num_units # argmax decoder cell_output, cell_type = output_fn(cell_output, latent_sample, label_embedding) # logits next_input_id = math_ops.cast( math_ops.argmax(cell_output, 1), dtype=dtype) done = math_ops.equal(next_input_id, end_of_sequence_id) cell_input = array_ops.gather(embeddings, next_input_id) # combine cell_input and attention next_input = array_ops.concat([cell_input, attention, label_embedding, latent_sample], 1) # if time > maxlen, return all true vector done = control_flow_ops.cond( math_ops.greater(time, maximum_length), lambda: array_ops.ones([batch_size,], dtype=dtypes.bool), lambda: done) return (done, cell_state, next_input, cell_output, context_state, cell_type)
def ones(value): return array_ops.shape(array_ops.ones(value)).numpy()
def testFromOperation(self): with self.test_scope(): tensor = array_ops.ones([3, 100, 2, 2]) reduced = math_ops.reduce_sum(tensor, axis=[0, 2, 3]) self.assertAllEqual(100 * [12.0], reduced)
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = locals() if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: raise TypeError( "`scale` parameter must have floating-point dtype.") with ops.name_scope(name, values=[loc] + scale.graph_parents): # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) super(MultivariateNormalLinearOperator, self).__init__(distribution=normal.Normal( loc=array_ops.zeros([], dtype=scale.dtype), scale=array_ops.ones([], dtype=scale.dtype)), bijector=bijectors.AffineLinearOperator( shift=loc, scale=scale, validate_args=validate_args), batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def testDropoutProperties(self): dp = core_layers.Dropout(0.5, name='dropout') self.assertEqual(dp.rate, 0.5) self.assertEqual(dp.noise_shape, None) dp.apply(array_ops.ones(())) self.assertEqual(dp.name, 'dropout')
def testScanEmptyTensor(self): with self.cached_session(): x = functional_ops.scan( lambda x, _: x, math_ops.range(0), initializer=array_ops.ones([2, 4])) self.assertAllEqual([0, 2, 4], x.get_shape()) self.assertAllEqual(x.get_shape(), self.evaluate(x).shape)
def testScalarSummaryIsPartOfCollectionWithPrint(self): tensor = array_ops.ones([]) * 3 name = 'my_score' prefix = 'eval' op = summaries.add_scalar_summary(tensor, name, prefix, print_summary=True) self.assertTrue(op in ops.get_collection(ops.GraphKeys.SUMMARIES))
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype return array_ops.ones(shape, dtype)
def value_fn(ctx): return array_ops.ones( shape=(range(1, ctx.replica_id_in_sync_group + 2)))
def add_leading_unit_dimensions(x, num_dimensions): new_shape = array_ops.concat( [array_ops.ones([num_dimensions], dtype=dtypes.int32), array_ops.shape(x)], axis=0) return array_ops.reshape(x, new_shape)
def build(self, input_shape): self.w = array_ops.ones(shape=(3, 4))
def create_dataset(_): return (array_ops.ones(2, dtype=dtypes.float32), array_ops.zeros((3, 4), dtype=dtypes.int32))
def create_cyclegan_model(): return train.cyclegan_model(generator_model, discriminator_model, data_x=array_ops.zeros([1, 2]), data_y=array_ops.ones([1, 2]))
def testGraphBuildAssertionFailures(self): val = [array_ops.zeros([1, 3]), array_ops.ones([1, 5])] label = constant_op.constant([1], shape=[1]) # must have batch dimension probs = [.2] * 5 init_probs = [.1, .3, .1, .3, .2] batch_size = 16 # Label must have only batch dimension if enqueue_many is True. with self.assertRaises(ValueError): sampling_ops.stratified_sample( val, array_ops.zeros([]), probs, batch_size, init_probs, enqueue_many=True) with self.assertRaises(ValueError): sampling_ops.stratified_sample( val, array_ops.zeros([1, 1]), probs, batch_size, init_probs, enqueue_many=True) # Label must not be one-hot. with self.assertRaises(ValueError): sampling_ops.stratified_sample(val, constant_op.constant([0, 1, 0, 0, 0]), probs, batch_size, init_probs) # Data must be list, not singleton tensor. with self.assertRaises(TypeError): sampling_ops.stratified_sample( array_ops.zeros([1, 3]), label, probs, batch_size, init_probs) # Data must have batch dimension if enqueue_many is True. with self.assertRaises(ValueError): sampling_ops.stratified_sample( val, constant_op.constant(1), probs, batch_size, init_probs, enqueue_many=True) # Batch dimensions on data and labels should be equal. with self.assertRaises(ValueError): sampling_ops.stratified_sample( [array_ops.zeros([2, 1])], label, probs, batch_size, init_probs, enqueue_many=True) # Probabilities must be numpy array, python list, or tensor. with self.assertRaises(ValueError): sampling_ops.stratified_sample(val, label, 1, batch_size, init_probs) # Probabilities shape must be fully defined. with self.assertRaises(ValueError): sampling_ops.stratified_sample( val, label, array_ops.placeholder( dtypes.float32, shape=[None]), batch_size, init_probs) # In the rejection sampling case, make sure that probability lengths are # the same. with self.assertRaises(ValueError): sampling_ops.stratified_sample( val, label, [.1] * 10, batch_size, init_probs=[.2] * 5) # In the rejection sampling case, make sure that zero initial probability # classes also have zero target probability. with self.assertRaises(ValueError): sampling_ops.stratified_sample( val, label, [.2, .4, .4], batch_size, init_probs=[0, .5, .5])
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features, sparse_feature_with_values = [], [], [] dense_feature_weights = [] sparse_feature_weights, sparse_feature_with_values_weights = [], [] for column in sorted(columns_to_variables.keys(), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # pylint: disable=protected-access # A real-valued column corresponds to a dense feature in SDCA. A # transformed tensor corresponding to a RealValuedColumn has rank 2 # (its shape is typically [batch_size, column.dimension]) and so it # can be passed to SDCA as is. dense_features.append(transformed_tensor) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # pylint: disable=protected-access # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseFeatureColumn respresenting the one-hot encoding of the # bucketized feature. # # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a # bucketized feature column to a dense feature in SDCA. This will likely # depend on the number of buckets. dense_bucket_tensor = column._to_dnn_input_layer( transformed_tensor) # pylint: disable=protected-access sparse_feature_column = _dense_tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) # For bucketized columns, the variables list contains exactly one # element. sparse_feature_with_values_weights.append( columns_to_variables[column][0]) elif isinstance( column, ( layers.feature_column._CrossedColumn, # pylint: disable=protected-access layers.feature_column._SparseColumn)): # pylint: disable=protected-access sparse_features.append( SparseFeatureColumn( array_ops.reshape( array_ops.split(value=transformed_tensor.indices, num_or_size_splits=2, axis=1)[0], [-1]), array_ops.reshape(transformed_tensor.values, [-1]), None)) sparse_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access id_tensor = column.id_tensor(transformed_tensor) weight_tensor = column.weight_tensor(transformed_tensor) sparse_feature_with_values.append( SparseFeatureColumn( array_ops.reshape( array_ops.split(value=id_tensor.indices, num_or_size_splits=2, axis=1)[0], [-1]), array_ops.reshape(id_tensor.values, [-1]), array_ops.reshape(weight_tensor.values, [-1]))) sparse_feature_with_values_weights.append( columns_to_variables[column][0]) else: raise ValueError( "SDCAOptimizer does not support column type {}".format( type(column).__name__)) example_weights = array_ops.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[optimizer.example_id_column] sparse_feature_with_values.extend(sparse_features) sparse_feature_with_values_weights.extend(sparse_feature_weights) examples = dict(sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.to_float( array_ops.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) return examples, sdca_variables
def testClusterSpecPropagationThreeServers2Graphs(self): """Boots 3 servers, creates 2 sessions, ensures appropriate operations. We create 2 clusterspecs: 1. server2 as the master, server1 as a worker 2. server2 as the master, server3 as a worker We ensure that variables on the workers are independent. """ server1 = server_lib.Server.create_local_server() server2 = server_lib.Server.create_local_server() server3 = server_lib.Server.create_local_server() cluster_def1 = cluster_pb2.ClusterDef() job1 = cluster_def1.job.add() job1.name = 'worker1' job1.tasks[0] = server2.target[len('grpc://'):] job1.tasks[1] = server1.target[len('grpc://'):] cluster_def2 = cluster_pb2.ClusterDef() job2 = cluster_def2.job.add() job2.name = 'worker2' job2.tasks[0] = server2.target[len('grpc://'):] job2.tasks[1] = server3.target[len('grpc://'):] config1 = config_pb2.ConfigProto(cluster_def=cluster_def1) config2 = config_pb2.ConfigProto(cluster_def=cluster_def2) with ops.Graph().as_default() as g1: with ops.device('/job:worker1/task:1'): var1 = variables.Variable(array_ops.zeros([2]), name='var1') update_op1 = state_ops.assign_add(var1, array_ops.ones([2]), name='var1_assign_add') init1 = variables.global_variables_initializer() with ops.Graph().as_default() as g2: with ops.device('/job:worker2/task:1'): var2 = variables.Variable(array_ops.zeros([2]), name='var2') update_op2 = state_ops.assign_add(var2, array_ops.ones([2]), name='var2_assign_add') init2 = variables.global_variables_initializer() sess1 = session.Session(server2.target, graph=g1, config=config1) sess2 = session.Session(server2.target, graph=g2, config=config2) init1.run(session=sess1) init2.run(session=sess2) expected_zeros = np.zeros([2]) expected_ones = np.ones([2]) self.assertAllEqual(expected_zeros, sess1.run(var1)) self.assertAllEqual(expected_zeros, sess2.run(var2)) self.assertAllEqual(expected_ones, sess1.run(update_op1)) self.assertAllEqual(expected_ones, sess1.run(var1)) self.assertAllEqual(expected_zeros, sess2.run(var2)) self.assertAllEqual(expected_ones, sess2.run(update_op2)) self.assertAllEqual(expected_ones + expected_ones, sess1.run(update_op1)) self.assertAllEqual(expected_ones, sess2.run(var2)) self.assertAllEqual(expected_ones + expected_ones, sess1.run(var1))
def create_callable_cyclegan_model(): return train.cyclegan_model(Generator(), Discriminator(), data_x=array_ops.zeros([1, 2]), data_y=array_ops.ones([1, 2]))
def _ones(self): if self.get_batch_shape().is_fully_defined(): return array_ops.ones(self.get_batch_shape(), dtype=self.dtype) return array_ops.ones(self.batch_shape(), dtype=self.dtype)
def get_callable_cyclegan_model(): return namedtuples.CycleGANModel(model_x2y=get_callable_gan_model(), model_y2x=get_callable_gan_model(), reconstructed_x=array_ops.ones([1, 2, 3]), reconstructed_y=array_ops.zeros([1, 2, 3]))
def lu_reconstruct(lower_upper, perm, validate_args=False, name=None): """The reconstruct one or more matrices from their LU decomposition(s). Args: lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`. perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Default value: `False` (i.e., don't validate arguments). name: Python `str` name given to ops managed by this object. Default value: `None` (i.e., 'lu_reconstruct'). Returns: x: The original input to `tf.linalg.lu`, i.e., `x` as in, `lu_reconstruct(*tf.linalg.lu(x))`. #### Examples ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp x = [[[3., 4], [1, 2]], [[7., 8], [3, 4]]] x_reconstructed = tf.linalg.lu_reconstruct(*tf.linalg.lu(x)) tf.assert_near(x, x_reconstructed) # ==> True ``` """ with ops.name_scope(name or 'lu_reconstruct'): lower_upper = ops.convert_to_tensor(lower_upper, dtype_hint=dtypes.float32, name='lower_upper') perm = ops.convert_to_tensor(perm, dtype_hint=dtypes.int32, name='perm') assertions = lu_reconstruct_assertions(lower_upper, perm, validate_args) if assertions: with ops.control_dependencies(assertions): lower_upper = array_ops.identity(lower_upper) perm = array_ops.identity(perm) shape = array_ops.shape(lower_upper) lower = set_diag(band_part(lower_upper, num_lower=-1, num_upper=0), array_ops.ones(shape[:-1], dtype=lower_upper.dtype)) upper = band_part(lower_upper, num_lower=0, num_upper=-1) x = math_ops.matmul(lower, upper) if (lower_upper.shape is None or lower_upper.shape.rank is None or lower_upper.shape.rank != 2): # We either don't know the batch rank or there are >0 batch dims. batch_size = math_ops.reduce_prod(shape[:-2]) d = shape[-1] x = array_ops.reshape(x, [batch_size, d, d]) perm = array_ops.reshape(perm, [batch_size, d]) perm = map_fn.map_fn(array_ops.invert_permutation, perm) batch_indices = array_ops.broadcast_to( math_ops.range(batch_size)[:, array_ops.newaxis], [batch_size, d]) x = array_ops.gather_nd( x, array_ops.stack([batch_indices, perm], axis=-1)) x = array_ops.reshape(x, shape) else: x = array_ops.gather(x, array_ops.invert_permutation(perm)) x.set_shape(lower_upper.shape) return x
def bar(): one = array_ops.ones([]) self.assertEqual(expected_device, one.device) return one + 1
def lu_solve(lower_upper, perm, rhs, validate_args=False, name=None): """Solves systems of linear eqns `A X = RHS`, given LU factorizations. Note: this function does not verify the implied matrix is actually invertible nor is this condition checked even when `validate_args=True`. Args: lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`. perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`. rhs: Matrix-shaped float `Tensor` representing targets for which to solve; `A X = RHS`. To handle vector cases, use: `lu_solve(..., rhs[..., tf.newaxis])[..., 0]`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Note: this function does not verify the implied matrix is actually invertible, even when `validate_args=True`. Default value: `False` (i.e., don't validate arguments). name: Python `str` name given to ops managed by this object. Default value: `None` (i.e., 'lu_solve'). Returns: x: The `X` in `A @ X = RHS`. #### Examples ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp x = [[[1., 2], [3, 4]], [[7, 8], [3, 4]]] inv_x = tf.linalg.lu_solve(*tf.linalg.lu(x), rhs=tf.eye(2)) tf.assert_near(tf.matrix_inverse(x), inv_x) # ==> True ``` """ with ops.name_scope(name or 'lu_solve'): lower_upper = ops.convert_to_tensor(lower_upper, dtype_hint=dtypes.float32, name='lower_upper') perm = ops.convert_to_tensor(perm, dtype_hint=dtypes.int32, name='perm') rhs = ops.convert_to_tensor(rhs, dtype_hint=lower_upper.dtype, name='rhs') assertions = _lu_solve_assertions(lower_upper, perm, rhs, validate_args) if assertions: with ops.control_dependencies(assertions): lower_upper = array_ops.identity(lower_upper) perm = array_ops.identity(perm) rhs = array_ops.identity(rhs) if (rhs.shape.rank == 2 and perm.shape.rank == 1): # Both rhs and perm have scalar batch_shape. permuted_rhs = array_ops.gather(rhs, perm, axis=-2) else: # Either rhs or perm have non-scalar batch_shape or we can't determine # this information statically. rhs_shape = array_ops.shape(rhs) broadcast_batch_shape = array_ops.broadcast_dynamic_shape( rhs_shape[:-2], array_ops.shape(perm)[:-1]) d, m = rhs_shape[-2], rhs_shape[-1] rhs_broadcast_shape = array_ops.concat( [broadcast_batch_shape, [d, m]], axis=0) # Tile out rhs. broadcast_rhs = array_ops.broadcast_to(rhs, rhs_broadcast_shape) broadcast_rhs = array_ops.reshape(broadcast_rhs, [-1, d, m]) # Tile out perm and add batch indices. broadcast_perm = array_ops.broadcast_to(perm, rhs_broadcast_shape[:-1]) broadcast_perm = array_ops.reshape(broadcast_perm, [-1, d]) broadcast_batch_size = math_ops.reduce_prod(broadcast_batch_shape) broadcast_batch_indices = array_ops.broadcast_to( math_ops.range(broadcast_batch_size)[:, array_ops.newaxis], [broadcast_batch_size, d]) broadcast_perm = array_ops.stack( [broadcast_batch_indices, broadcast_perm], axis=-1) permuted_rhs = array_ops.gather_nd(broadcast_rhs, broadcast_perm) permuted_rhs = array_ops.reshape(permuted_rhs, rhs_broadcast_shape) lower = set_diag( band_part(lower_upper, num_lower=-1, num_upper=0), array_ops.ones(array_ops.shape(lower_upper)[:-1], dtype=lower_upper.dtype)) return triangular_solve( lower_upper, # Only upper is accessed. triangular_solve(lower, permuted_rhs), lower=False)
def _ones_like(x): """Convenience function attempts to statically construct `ones_like`.""" # Should only be used for small vectors. if x.get_shape().is_fully_defined(): return array_ops.ones(x.get_shape().as_list(), dtype=x.dtype) return array_ops.ones_like(x)
def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers, total_counts): """Creates an op for training for mini batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. total_counts: Tensor Ref of cluster counts. Returns: An op for doing an update of mini-batch k-means. """ update_ops = [] for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp, ignore_existing=True): assert total_counts is not None cluster_idx = array_ops.reshape(cluster_idx, [-1]) # Dedupe the unique ids of cluster_centers being updated so that updates # can be locally aggregated. unique_ids, unique_idx = array_ops.unique(cluster_idx) num_unique_cluster_idx = array_ops.size(unique_ids) # Fetch the old values of counts and cluster_centers. with ops.colocate_with(total_counts, ignore_existing=True): old_counts = array_ops.gather(total_counts, unique_ids) # TODO(agarwal): This colocation seems to run into problems. Fix it. with ops.colocate_with(cluster_centers, ignore_existing=True): old_cluster_centers = array_ops.gather( cluster_centers, unique_ids) # Locally aggregate the increment to counts. count_updates = math_ops.unsorted_segment_sum( array_ops.ones_like(unique_idx, dtype=total_counts.dtype), unique_idx, num_unique_cluster_idx) # Locally compute the sum of inputs mapped to each id. # For a cluster with old cluster value x, old count n, and with data # d_1,...d_k newly assigned to it, we recompute the new value as # x += (sum_i(d_i) - k * x) / (n + k). # Compute sum_i(d_i), see comment above. cluster_center_updates = math_ops.unsorted_segment_sum( inp, unique_idx, num_unique_cluster_idx) # Shape to enable broadcasting count_updates and learning_rate to inp. # It extends the shape with 1's to match the rank of inp. broadcast_shape = array_ops.concat([ array_ops.reshape(num_unique_cluster_idx, [1]), array_ops.ones(array_ops.reshape( array_ops.rank(inp) - 1, [1]), dtype=dtypes.int32) ], 0) # Subtract k * x, see comment above. cluster_center_updates -= math_ops.cast( array_ops.reshape(count_updates, broadcast_shape), inp.dtype) * old_cluster_centers learning_rate = math_ops.reciprocal( math_ops.cast(old_counts + count_updates, inp.dtype)) learning_rate = array_ops.reshape(learning_rate, broadcast_shape) # scale by 1 / (n + k), see comment above. cluster_center_updates *= learning_rate # Apply the updates. update_counts = state_ops.scatter_add(total_counts, unique_ids, count_updates) update_cluster_centers = state_ops.scatter_add( cluster_centers, unique_ids, cluster_center_updates) update_ops.extend([update_counts, update_cluster_centers]) return control_flow_ops.group(*update_ops)
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `tf.distributions.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = locals() with ops.name_scope(name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ]): loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = normal.Normal(loc=array_ops.zeros([], dtype=dtype), scale=array_ops.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = control_flow_ops.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = ops.convert_to_tensor(skewness, dtype=dtype, name="skewness") tailweight = ops.convert_to_tensor(tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh(skewness=skewness, tailweight=tailweight, event_ndims=1) if has_default_skewness: f_noskew = f else: f_noskew = bijectors.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), tailweight=tailweight, event_ndims=0) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f_noskew.forward( ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine(shift=loc, scale_diag=c, validate_args=validate_args, event_ndims=1) bijector = bijectors.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__(distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness
def _determinant(self): return array_ops.ones(shape=self.batch_shape_tensor(), dtype=self.dtype)
def initializer(): if init_mode == "scalar": return wt_init * array_ops.ones([size]) else: return wt_init[i]
def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates eval metrics. See `base_head.Head` for details.""" logits = base_head.check_logits_final_dim(logits, self.logits_dimension) processed_labels = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, processed_labels, features) prob_key = prediction_keys.PredictionKeys.PROBABILITIES predictions = self.predictions(logits, [prob_key]) probabilities = predictions[prob_key] # Update metrics. eval_metrics[self._loss_mean_key].update_state(values=unweighted_loss, sample_weight=weights) eval_metrics[self._auc_key].update_state(y_true=processed_labels, y_pred=probabilities, sample_weight=weights) eval_metrics[self._auc_pr_key].update_state(y_true=processed_labels, y_pred=probabilities, sample_weight=weights) if regularization_losses is not None: regularization_loss = math_ops.add_n(regularization_losses) eval_metrics[self._loss_regularization_key].update_state( values=regularization_loss) for i in range(len(self._thresholds)): eval_metrics[self._accuracy_keys[i]].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) eval_metrics[self._precision_keys[i]].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) eval_metrics[self._recall_keys[i]].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) for i, class_id in enumerate(self._classes_for_class_based_metrics): batch_rank = array_ops.rank(probabilities) - 1 begin = array_ops.concat([ array_ops.zeros([batch_rank], dtype=dtypes.int32), [class_id] ], axis=0) size = array_ops.concat( [-1 * array_ops.ones([batch_rank], dtype=dtypes.int32), [1]], axis=0) class_probabilities = array_ops.slice(probabilities, begin=begin, size=size) class_labels = array_ops.slice(processed_labels, begin=begin, size=size) base_head.update_metric_with_broadcast_weights( eval_metrics[self._prob_keys[i]], class_probabilities, weights) eval_metrics[self._auc_keys[i]].update_state( y_true=class_labels, y_pred=class_probabilities, sample_weight=weights) eval_metrics[self._auc_pr_keys[i]].update_state( y_true=class_labels, y_pred=class_probabilities, sample_weight=weights) return eval_metrics