def testGroup_MultiDevice(self): with ops.Graph().as_default() as g: with g.device("/task:0"): a = constant_op.constant(0, name="a") b = constant_op.constant(0, name="b") with g.device("/task:1"): c = constant_op.constant(0, name="c") d = constant_op.constant(0, name="d") with g.device("/task:2"): control_flow_ops.group(a.op, b.op, c.op, d.op, name="root") gd = g.as_graph_def() self.assertProtoEquals( """ node { name: "a" op: "Const" device: "/task:0"} node { name: "b" op: "Const" device: "/task:0"} node { name: "c" op: "Const" device: "/task:1"} node { name: "d" op: "Const" device: "/task:1"} node { name: "root/NoOp" op: "NoOp" input: "^a" input: "^b" device: "/task:0" } node { name: "root/NoOp_1" op: "NoOp" input: "^c" input: "^d" device: "/task:1" } node { name: "root" op: "NoOp" input: "^root/NoOp" input: "^root/NoOp_1" device: "/task:2" } """, self._StripGraph(gd), )
def benchmarkMatrixInverseOp(self): for adjoint in False, True: for shape in self.shapes: with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(inv), min_iters=25, name="matrix_inverse_cpu_{shape}_adjoint_{adjoint}".format( shape=shape, adjoint=adjoint)) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(inv), min_iters=25, name="matrix_inverse_gpu_{shape}_adjoint_{adjoint}".format( shape=shape, adjoint=adjoint))
def benchmarkMatrixSolveLsOp(self): run_gpu_test = test_lib.is_gpu_available(True) regularizer = 1.0 for matrix_shape in self.matrix_shapes: for num_rhs in 1, 2, matrix_shape[-1]: with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix, rhs = _GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(x), min_iters=25, store_memory_usage=False, name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}" ).format(matrix_shape=matrix_shape, num_rhs=num_rhs)) if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix, rhs = _GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(x), min_iters=25, store_memory_usage=False, name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_" "{num_rhs}").format( matrix_shape=matrix_shape, num_rhs=num_rhs))
def benchmarkMatrixBandPartOp(self): for shape_ in self.shapes: for limits in (-1, -1), (-1, 0), (0, -1), (2, 2): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): matrix = variables.Variable(array_ops.ones(shape_)) band = array_ops.matrix_band_part(matrix, limits[0], limits[1]) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(band), min_iters=10, name="matrix_band_part_cpu_{shape}_{limits}".format( shape=shape_, limits=limits)) if test_lib.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/gpu:0"): matrix = variables.Variable(array_ops.ones(shape_)) band = array_ops.matrix_band_part(matrix, limits[0], limits[1]) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(band), min_iters=10, name="matrix_band_part_gpu_{shape}_{limits}".format( shape=shape_, limits=limits))
def benchmarkQROp(self): for shape_ in self.shapes: with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix_value = np.random.uniform( low=-1.0, high=1.0, size=shape_).astype(np.float32) matrix = variables.Variable(matrix_value) q, r = linalg_ops.qr(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(q, r), min_iters=25, name="QR_cpu_{shape}".format(shape=shape_)) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/device:GPU:0"): matrix_value = np.random.uniform( low=-1.0, high=1.0, size=shape_).astype(np.float32) matrix = variables.Variable(matrix_value) q, r = linalg_ops.qr(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(q, r), min_iters=25, name="QR_gpu_{shape}".format(shape=shape_))
def benchmarkCholeskyOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): matrix = variables.Variable(self._GenerateMatrix(shape)) l = linalg_ops.cholesky(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group( l,), min_iters=25, name="cholesky_cpu_{shape}".format(shape=shape)) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/device:GPU:0"): matrix = variables.Variable(self._GenerateMatrix(shape)) l = linalg_ops.cholesky(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group( l,), min_iters=25, name="cholesky_gpu_{shape}".format(shape=shape))
def benchmarkMatrixExponentialOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) expm = linalg_impl.matrix_exponential(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(expm), min_iters=25, name="matrix_exponential_cpu_{shape}".format( shape=shape)) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) expm = linalg_impl.matrix_exponential(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(expm), min_iters=25, name="matrix_exponential_gpu_{shape}".format( shape=shape))
def benchmarkTridiagonalMulOp(self): devices = [('/cpu:0', 'cpu')] if test.is_gpu_available(cuda_only=True): devices += [('/gpu:0', 'gpu')] for device_option, size_option in itertools.product(devices, self.sizes): device_id, device_name = device_option m, batch_size, n = size_option with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device(device_id): upper, diag, lower, vec = self._generateData(batch_size, m, n) x1 = self.baseline(upper, diag, lower, vec) x2 = linalg_impl.tridiagonal_matmul((upper, diag, lower), vec, diagonals_format='sequence') variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(x1), min_iters=10, store_memory_usage=False, name=('tridiagonal_matmul_baseline_%s' '_batch_size_%d_m_%d_n_%d' % (device_name, batch_size, m, n))) self.run_op_benchmark( sess, control_flow_ops.group(x2), min_iters=10, store_memory_usage=False, name=('tridiagonal_matmul_%s_batch_size_%d_m_%d_n_%d' % (device_name, batch_size, m, n)))
def benchmarkMatrixInverseOp(self): for adjoint in False, True: for size in self.sizes: data = self._GenerateData(size) with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): inv = linalg_ops.matrix_inverse(data, adjoint=adjoint) self.run_op_benchmark( sess, control_flow_ops.group(inv), min_iters=25, name="matrix_inverse_cpu_{size}_{adjoint}".format( size=size, adjoint="adjoint" if adjoint else "noadjoint")) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/gpu:0"): inv = linalg_ops.matrix_inverse(data, adjoint=adjoint) self.run_op_benchmark( sess, control_flow_ops.group(inv), min_iters=25, name="matrix_inverse_gpu_{size}_{adjoint}".format( size=size, adjoint="adjoint" if adjoint else "noadjoint"))
def benchmarkMatrixDeterminantOp(self): for shape in self.shapes: with ops.Graph().as_default(), session.Session( config=benchmark.benchmark_config()) as sess, ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) d = linalg_ops.matrix_determinant(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group( d,), min_iters=25, name="matrix_determinant_cpu_{shape}".format(shape=shape)) if test.is_gpu_available(True): with ops.Graph().as_default(), session.Session( config=benchmark.benchmark_config()) as sess, ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) d = linalg_ops.matrix_determinant(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group( d,), min_iters=25, name="matrix_determinant_gpu_{shape}".format(shape=shape))
def make_ops_and_vars_round_robin(self, scope=None, cov_devices=None, inv_devices=None): """Make ops and vars with a round-robin device placement strategy. For each factor, all of that factor's cov variables and their associated update ops will be placed on a particular device. A new device is chosen for each factor by cycling through list of devices in the cov_devices argument. If cov_devices is None then no explicit device placement occurs. An analogous strategy is followed for inverse update ops, with the list of devices being given by the inv_devices argument. Inverse variables on the other hand are not placed on any specific device (they will just use the current the device placement context, whatever that happens to be). The idea is that the inverse variable belong where they will be accessed most often, which is the device that actually applies the preconditioner to the gradient. The user will be responsible for setting the device context for this. Args: scope: A string or None. If None it will be set to the name of this estimator (given by the name property). All variables will be created, and all ops will execute, inside of a variable scope of the given name. (Default: None) cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance computations will be placed on these devices in a round-robin fashion. Can be None, which means that no devices are specified. inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion computations will be placed on these devices in a round-robin fashion. Can be None, which means that no devices are specified. Returns: cov_update_ops: List of ops that compute the cov updates. Corresponds one-to-one with the list of factors given by the "factors" property. cov_update_op: cov_update_ops grouped into a single op. inv_update_ops: List of ops that compute the inv updates. Corresponds one-to-one with the list of factors given by the "factors" property. inv_update_op: inv_update_ops grouped into a single op. cov_update_thunks: Thunks that make the ops in cov_update_ops. inv_update_thunks: Thunks that make the ops in inv_update_ops. """ (cov_update_thunks, inv_update_thunks) = self.make_vars_and_create_op_thunks_round_robin( scope=scope, cov_devices=cov_devices, inv_devices=inv_devices) cov_update_ops = [thunk() for thunk in cov_update_thunks] inv_update_ops = [thunk() for thunk in inv_update_thunks] scope = self.name if scope is None else scope with variable_scope.variable_scope(scope): cov_update_op = control_flow_ops.group(cov_update_ops, name="cov_update_op") inv_update_op = control_flow_ops.group(inv_update_ops, name="inv_update_op") return (cov_update_ops, cov_update_op, inv_update_ops, inv_update_op, cov_update_thunks, inv_update_thunks)
def _create_transient_vars(self): """Creates local cache of factors, weights and gramian for rows and columns. Note that currently the caching strategy is as follows: When initiating a row (resp. column) update: - The column (resp. row) gramian is computed. - Optionally, if use_gramian_cache is True, the column (resp. row) Gramian is cached, while the row (resp. column) gramian is reset. - Optionally, if use_factors_weights_cache is True, the column (resp. row) factors and weights are cached, while the row (resp. column) factors and weights are reset. """ (self._row_factors_cache, row_factors_cache_init, row_factors_cache_reset) = self._cached_copy( self._row_factors, "row_factors_cache", pass_through=not self._use_factors_weights_cache) (self._col_factors_cache, col_factors_cache_init, col_factors_cache_reset) = self._cached_copy( self._col_factors, "col_factors_cache", pass_through=not self._use_factors_weights_cache) (self._row_wt_cache, row_wt_cache_init, _) = self._cached_copy( self._row_weights, "row_wt_cache", pass_through=not self._use_factors_weights_cache) (self._col_wt_cache, col_wt_cache_init, _) = self._cached_copy( self._col_weights, "col_wt_cache", pass_through=not self._use_factors_weights_cache) (self._row_gramian_cache, row_gramian_cache_init, row_gramian_cache_reset) = self._cached_copy( self._row_gramian, "row_gramian_cache", pass_through=not self._use_gramian_cache) (self._col_gramian_cache, col_gramian_cache_init, col_gramian_cache_reset) = self._cached_copy( self._col_gramian, "col_gramian_cache", pass_through=not self._use_gramian_cache) self._row_updates_init = control_flow_ops.group(col_factors_cache_init, row_factors_cache_reset, col_gramian_cache_init, row_gramian_cache_reset) self._col_updates_init = control_flow_ops.group(row_factors_cache_init, col_factors_cache_reset, row_gramian_cache_init, col_gramian_cache_reset) if self._row_wt_cache is not None: assert self._col_wt_cache is not None self._worker_init = control_flow_ops.group( row_wt_cache_init, col_wt_cache_init, name="worker_init") else: self._worker_init = control_flow_ops.no_op(name="worker_init")
def head_ops(self, features, target, mode, train_op_fn, logits=None, logits_input=None): """Returns ops for a model_fn. Args: features: input dict. target: target dict or tensor. mode: estimator's ModeKeys train_op_fn: function that takes a scalar loss and returns an op to optimize with the loss. logits: logits to be used for the head. logits_input: tensor to build logits from. Returns: `estimator.ModelFnOps` Raises: ValueError: if mode is not recognized. """ _check_logits_input_not_supported(logits, logits_input) if mode == estimator.ModeKeys.TRAIN: loss, additional_train_op = self._training_loss(features, target, logits, logits_input) train_op = train_op_fn(loss) if additional_train_op: if train_op: train_op = control_flow_ops.group(train_op, *additional_train_op) else: train_op = control_flow_ops.group(*additional_train_op) return estimator.ModelFnOps( mode=estimator.ModeKeys.TRAIN, loss=loss, training_op=train_op, default_metrics=self._default_metric(), signature_fn=self._create_signature_fn()) if mode == estimator.ModeKeys.INFER: return estimator.ModelFnOps( mode=estimator.ModeKeys.INFER, predictions=self._infer_op(logits, logits_input), default_metrics=self._default_metric(), signature_fn=self._create_signature_fn()) if mode == estimator.ModeKeys.EVAL: predictions, loss = self._eval_op(features, target, logits, logits_input) return estimator.ModelFnOps( mode=estimator.ModeKeys.EVAL, predictions=predictions, loss=loss, default_metrics=self._default_metric(), signature_fn=self._create_signature_fn()) raise ValueError("mode=%s unrecognized." % str(mode))
def _resource_apply_sparse(self, grad, var, indices): var_dtype = var.dtype.base_dtype lr_t = self._decayed_lr(var_dtype) rms = self.get_slot(var, "rms") rho = self._get_hyper("rho", var_dtype) momentum = self._get_hyper("momentum", var_dtype) epsilon = self._get_hyper("epsilon", var_dtype) if self._momentum: mom = self.get_slot(var, "momentum") if self.centered: mg = self.get_slot(var, "mg") return training_ops.resource_sparse_apply_centered_rms_prop( var.handle, mg.handle, rms.handle, mom.handle, lr_t, rho, momentum, epsilon, grad, indices, use_locking=self._use_locking) else: return training_ops.resource_sparse_apply_rms_prop( var.handle, rms.handle, mom.handle, lr_t, rho, momentum, epsilon, grad, indices, use_locking=self._use_locking) else: rms_scaled_g_values = (grad * grad) * (1. - rho) rms_t = state_ops.assign(rms, rms * rho, use_locking=self._use_locking) with ops.control_dependencies([rms_t]): rms_t = self._resource_scatter_add(rms, indices, rms_scaled_g_values) rms_slice = array_ops.gather(rms_t, indices) denom_slice = rms_slice if self.centered: mg = self.get_slot(var, "mg") mg_scaled_g_values = grad * (1. - rho) mg_t = state_ops.assign(mg, mg * rho, use_locking=self._use_locking) with ops.control_dependencies([mg_t]): mg_t = self._resource_scatter_add(mg, indices, mg_scaled_g_values) mg_slice = array_ops.gather(mg_t, indices) denom_slice = rms_slice - math_ops.square(mg_slice) var_update = self._resource_scatter_add( var, indices, -lr_t * grad / (math_ops.sqrt(denom_slice) + epsilon)) if self.centered: return control_flow_ops.group(*[var_update, rms_t, mg_t]) return control_flow_ops.group(*[var_update, rms_t])
def testPassingList(self): with ops.Graph().as_default() as g: a = constant_op.constant(0, name="a") b = constant_op.constant(0, name="b") control_flow_ops.group([a.op, b.op], name="root") gd = g.as_graph_def() self.assertProtoEquals(""" node { name: "a" op: "Const"} node { name: "b" op: "Const"} node { name: "root" op: "NoOp" input: "^a" input: "^b" } """, self._StripGraph(gd))
def testGroup_OneDevice(self): with ops.Graph().as_default() as g: with g.device("/task:0"): a = constant_op.constant(0, name="a") b = constant_op.constant(0, name="b") control_flow_ops.group(a.op, b.op, name="root") gd = g.as_graph_def() self.assertProtoEquals(""" node { name: "a" op: "Const" device: "/task:0" } node { name: "b" op: "Const" device: "/task:0" } node { name: "root" op: "NoOp" input: "^a" input: "^b" device: "/task:0" } """, self._StripGraph(gd))
def testGroup_NoDevices(self): with ops.Graph().as_default() as g: a = constant_op.constant(0, name="a") b = constant_op.constant(0, name="b") c = constant_op.constant(0, name="c") control_flow_ops.group(a.op, b.op, c.op, name="root") gd = g.as_graph_def() self.assertProtoEquals(""" node { name: "a" op: "Const"} node { name: "b" op: "Const"} node { name: "c" op: "Const"} node { name: "root" op: "NoOp" input: "^a" input: "^b" input: "^c" } """, self._StripGraph(gd))
def randn_sampler_switchover(shape, num_iters, use_gpu=False): # Benchmark by constructing samplers on the threshold of using the randn # rejection sampling and check that this threshold is set correctly by # benchmarking with bounds just above and below this threshold. # The uniform and randn samplers should have about the same performance # at this point. stddev_inside_bounds_before_using_randn = ( _get_stddev_inside_bounds_before_using_randn(use_gpu)) epsilon = 0.001 np.random.seed(1618) # Make it reproducible. # No CSE/CF. optimizer_options = config_pb2.OptimizerOptions( opt_level=config_pb2.OptimizerOptions.L0) config = config_pb2.ConfigProto( graph_options=config_pb2.GraphOptions( optimizer_options=optimizer_options)) with session.Session(config=config) as sess: with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): uniform_sampler_op = control_flow_ops.group( random_ops.parameterized_truncated_normal( shape, means=0., stddevs=1.0, minvals=-stddev_inside_bounds_before_using_randn + epsilon, maxvals=0.01)) randn_sampler_op = control_flow_ops.group( random_ops.parameterized_truncated_normal( shape, means=0., stddevs=1.0, minvals=-stddev_inside_bounds_before_using_randn - epsilon, maxvals=0.01)) # Burn-in to avoid session setup costs in the timing. sess.run(uniform_sampler_op) sess.run(uniform_sampler_op) uniform_dt = timeit.timeit( lambda: sess.run(uniform_sampler_op), number=num_iters) sess.run(randn_sampler_op) sess.run(randn_sampler_op) randn_dt = timeit.timeit( lambda: sess.run(randn_sampler_op), number=num_iters) return randn_dt, uniform_dt
def build_graph(device, dtype, data_format, input_shape, filter_shape, strides, padding, num_iters, warmup_iters): """builds a graph containing a sequence of conv2d operations. Args: device: String, the device to run on. dtype: Data type for the convolution. data_format: A string from: "NHWC" or "NCHW". Data format for input and output data. input_shape: Shape of the input tensor. filter_shape: Shape of the filter tensor. strides: A list of ints. 1-D of length 4. The stride of sliding window for each dimension of input. padding: A string from: "SAME", "VALID". The type of padding algorithm to use. num_iters: number of iterations to run conv2d. warmup_iters: number of iterations for warmup runs. Returns: An array of tensors to run() """ with ops.device("/%s:0" % device): inp = variables.Variable( random_ops.truncated_normal(input_shape, dtype=dtype)) filt = variables.Variable( random_ops.truncated_normal(filter_shape, dtype=dtype)) outputs = [] conv2d_op = nn_ops.conv2d( inp, filt, strides, padding, data_format=data_format) outputs.append(conv2d_op) for _ in range(1, num_iters): with ops.control_dependencies([conv2d_op]): conv2d_op = nn_ops.conv2d( inp, filt, strides, padding, data_format=data_format) outputs.append(conv2d_op) warmup_groups = [] warmup_conv2d_op = nn_ops.conv2d( inp, filt, strides, padding, data_format=data_format) warmup_groups.append(warmup_conv2d_op) for _ in range(1, warmup_iters): with ops.control_dependencies([warmup_conv2d_op]): warmup_conv2d_op = nn_ops.conv2d( inp, filt, strides, padding, data_format=data_format) warmup_groups.append(warmup_conv2d_op) return control_flow_ops.group(*warmup_groups), control_flow_ops.group( *outputs)
def restore(self, restored_tensors, restored_shapes): if (self._cudnn_rnn.direction == CUDNN_RNN_UNIDIRECTION and self._cudnn_rnn.rnn_mode == CUDNN_LSTM): if len(restored_tensors) % 4 != 0: raise ValueError( "Invalid count of restored_tensors, expecting a multiple of 4.") weights = restored_tensors[:len(restored_tensors) // 4] biases = restored_tensors[len(restored_tensors) // 4:] elif (self._cudnn_rnn.direction == CUDNN_RNN_UNIDIRECTION and self._cudnn_rnn.rnn_mode == CUDNN_GRU): if len(restored_tensors) % 8 != 0: raise ValueError( "Invalid count of restored_tensors, expecting a multiple of 8.") weights = restored_tensors[:len(restored_tensors) // 8 * 3] biases = restored_tensors[len(restored_tensors) // 8 * 3:] else: weights = restored_tensors[:len(restored_tensors) // 2] biases = restored_tensors[len(restored_tensors) // 2:] weights, biases = self._untransform_canonical(weights, biases) params = self._canonical_to_params(weights, biases) if not isinstance(params, tuple): params = (params,) assign_ops = [ state_ops.assign(variable, param, validate_shape=False) for variable, param in zip(self._variables, params) ] return control_flow_ops.group(*assign_ops)
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) v_sqrt = math_ops.sqrt(v_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
def _auc_hist_accumulate(hist_true, hist_false, nbins, collections): """Accumulate histograms in new variables.""" with variable_scope.variable_op_scope( [hist_true, hist_false], None, 'hist_accumulate'): # Holds running total histogram of scores for records labeled True. hist_true_acc = variable_scope.get_variable( 'hist_true_acc', initializer=array_ops.zeros_initializer( [nbins], dtype=hist_true.dtype), collections=collections, trainable=False) # Holds running total histogram of scores for records labeled False. hist_false_acc = variable_scope.get_variable( 'hist_false_acc', initializer=array_ops.zeros_initializer( [nbins], dtype=hist_false.dtype), collections=collections, trainable=False) update_op = control_flow_ops.group( hist_true_acc.assign_add(hist_true), hist_false_acc.assign_add(hist_false), name='update_op') return hist_true_acc, hist_false_acc, update_op
def build_graph(device, input_shape, perm, datatype, num_iters): """builds a graph containing a sequence of conv2d operations. Args: device: String, the device to run on. input_shape: Shape of the input tensor. perm: A list of ints with the same length as input tensor's dimension. datatype: numpy data type of the input tensor. num_iters: number of iterations to run transpose. Returns: An array of tensors to run() """ with ops.device("/%s:0" % device): total_size = np.prod(input_shape) inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape) t = constant_op.constant(inp, shape=input_shape) outputs = [] transpose_op = array_ops.transpose(t, perm) outputs.append(transpose_op) for _ in range(1, num_iters): with ops.control_dependencies([transpose_op]): transpose_op = array_ops.transpose(t, perm) outputs.append(transpose_op) return control_flow_ops.group(*outputs)
def _between_graph_with_monitored_session(self, strategy): context = distribute_coordinator_context.get_current_worker_context() self.assertTrue(context is not None) with ops.device("/job:ps/task:0"): # TODO(yuefengz): investigate why not using resource variable will make # the test flaky. x = variable_scope.get_variable("xx", initializer=10.0, use_resource=True) with ops.device("/job:ps/task:1"): y = variable_scope.get_variable("yy", initializer=20.0, use_resource=True) x_add = x.assign_add(2.0) y_sub = y.assign_sub(2.0) train_op = control_flow_ops.group([x_add, y_sub]) # The monitored session will run init or ready ops. with monitored_session.MonitoredSession() as sess: sess.run(train_op) # Synchronize workers after one step to make sure they all have finished # training. if context.has_barrier: context.wait_for_other_workers() else: self._barrier.wait() x_val, y_val = sess.run([x, y]) self.assertEqual(x_val, 16.0) self.assertEqual(y_val, 14.0) if x_val == 16.0 and y_val == 14.0: with self._lock: self._result_correct += 1
def _get_train_ops(self, features, targets): """See base class.""" global_step = contrib_variables.get_global_step() assert global_step logits = self._logits(features, is_training=True) if self._enable_centered_bias: centered_bias_step = [self._centered_bias_step(targets, features)] else: centered_bias_step = [] with ops.control_dependencies(centered_bias_step): loss = self._loss(logits, targets, features) logging_ops.scalar_summary("loss", loss) linear_vars = self._get_linear_vars() dnn_vars = self._get_dnn_vars() grads = gradients.gradients(loss, dnn_vars + linear_vars) if self._gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, self._gradient_clip_norm) dnn_grads = grads[0 : len(dnn_vars)] linear_grads = grads[len(dnn_vars) :] train_ops = self._get_linear_training_ops(linear_grads, linear_vars) + self._get_dnn_training_ops( dnn_grads, dnn_vars ) train_step = control_flow_ops.group(*train_ops, name="combined_training_op") with ops.control_dependencies([train_step]): with ops.get_default_graph().colocate_with(global_step): return state_ops.assign_add(global_step, 1).op, loss
def loop_body(j): ns1 = tf.scatter_update(select1, j, 10.0) ns2 = tf.scatter_update(select2, j, 10.0) nj = tf.add(j, 1) op = control_flow_ops.group(ns1, ns2) nj = control_flow_ops.with_dependencies([op], nj) return [nj]
def _AddShardedRestoreOps(self, filename_tensor, per_device, restore_sequentially, reshape): """Add Ops to save variables from multiple devices. Args: filename_tensor: Tensor for the path of the file to load. per_device: A list of (device, _VarToSave) pairs, as returned by _GroupByDevices(). restore_sequentially: True if we want to restore variables sequentially within a shard. reshape: True if we want to reshape loaded tensors to the shape of the corresponding variable. Returns: An Operation that restores the variables. """ sharded_restores = [] for shard, (device, vars_to_save) in enumerate(per_device): with ops.device(device): sharded_restores.append(self._AddRestoreOps( filename_tensor, vars_to_save, restore_sequentially, reshape, preferred_shard=shard, name="restore_shard")) return control_flow_ops.group(*sharded_restores, name="restore_all")
def apply(self, var_list=None): # TODO(touts): op_scope if var_list is None: var_list = variables.trainable_variables() for var in var_list: if var.dtype.base_dtype not in [dtypes.float32, dtypes.float64]: raise TypeError( "The variables must be float or double: %s" % var) if var in self._averages: raise ValueError( "Moving average already computed for: %s" % var) # For variables: to lower communication bandwidth across devices we keep # the moving averages on the same device as the variables. For other # tensors, we rely on the existing device allocation mechanism. if isinstance(var, variables.Variable): avg = slot_creator.create_slot( var, var.initialized_value(), self._name, colocate_with_primary=True) else: avg = slot_creator.create_zeros_slot( var, self._name, colocate_with_primary=(var.op.type == "Variable")) self._averages[var] = avg with ops.name_scope(self._name) as scope: decay = self._num_updates / (self._num_updates + 1) updates = [] updates.append(self._num_updates_op) for var in var_list: updates.append(assign_moving_average( self._averages[var], var, decay)) return control_flow_ops.group(*updates, name=scope)
def build_graph(device, n, m, k, transpose_a, transpose_b, dtype): """Build a graph containing a sequence of matmul operations. Args: device: String, the device to run on. n: tensor A's first dimension size. m: tensor A's second dimension size. k: tensor B's second dimension size. transpose_a: boolean value to show if tensor A is transposed. transpose_b: boolean value to show if tensor B is transposed. dtype: numpy data type of the input tensor. Returns: A matmul operation to run() """ with ops.device('%s' % device): if not transpose_a: x = variables.VariableV1(random_ops.random_uniform([n, m], dtype=dtype), use_resource=False) else: x = variables.VariableV1(random_ops.random_uniform([m, n], dtype=dtype), use_resource=False) if not transpose_b: y = variables.VariableV1(random_ops.random_uniform([m, k], dtype=dtype), use_resource=False) else: y = variables.VariableV1(random_ops.random_uniform([k, m], dtype=dtype), use_resource=False) z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b) return control_flow_ops.group(z)
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ features, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, **self.construction_args) epoch = None if self.data_feeder: epoch = self.data_feeder.make_epoch_variable() train = control_flow_ops.group( graph_builder.training_graph( features, labels, data_spec=spec, epoch=epoch, **self.training_args), state_ops.assign_add(contrib_framework.get_global_step(), 1)) self.training_loss = graph_builder.training_loss() return train, self.training_loss
def evaluation_loop(master, checkpoint_dir, logdir, num_evals=1, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, eval_interval_secs=60, max_number_of_evaluations=None, session_config=None): """Runs TF-Slim's Evaluation Loop. Args: master: The BNS address of the TensorFlow master. checkpoint_dir: The directory where checkpoints are stored. logdir: The directory where the TensorFlow summaries are written to. num_evals: The number of times to run `eval_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. eval_interval_secs: The minimum number of seconds between evaluations. max_number_of_evaluations: the max number of iterations of the evaluation. If the value is left as 'None', the evaluation continues indefinitely. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() init_op = control_flow_ops.group(tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, init_op=init_op, summary_op=None, summary_writer=None, global_step=None, saver=saver) last_checkpoint = None number_of_evaluations = 0 while True: last_checkpoint = wait_for_new_checkpoint(checkpoint_dir, last_checkpoint) start = time.time() logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: sv.saver.restore(sess, last_checkpoint) sv.start_queue_runners(sess) final_op_value = evaluation( sess, num_evals=num_evals, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) number_of_evaluations += 1 if (max_number_of_evaluations and number_of_evaluations >= max_number_of_evaluations): logging.info('Reached max_number_of_evaluations=%s. Exit', max_number_of_evaluations) break time_to_next_eval = start + eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval) return final_op_value
def minimize(self, global_step=None, name=None): """Add operations to train a linear model by minimizing the loss function. Args: global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Returns: An Operation that updates the variables passed in the constructor. """ # Technically, the op depends on a lot more than the variables, # but we'll keep the list short. with op_scope([], name, 'sdca/minimize'): sparse_features_indices = [] sparse_features_values = [] for sf in self._examples['sparse_features']: sparse_features_indices.append(convert_to_tensor(sf.indices)) sparse_features_values.append(convert_to_tensor(sf.values)) example_ids_hashed = _sdca_ops.sdca_fprint( convert_to_tensor(self._examples['example_ids'])) example_state_data = self._hashtable.lookup(example_ids_hashed) example_state_data_updated = _sdca_ops.sdca_solver( sparse_features_indices, sparse_features_values, self._convert_n_to_tensor(self._examples['dense_features']), convert_to_tensor(self._examples['example_weights']), convert_to_tensor(self._examples['example_labels']), self._convert_n_to_tensor( self._slots['unshrinked_sparse_features_weights'], as_ref=True), self._convert_n_to_tensor( self._slots['unshrinked_dense_features_weights'], as_ref=True), example_state_data, l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization(), # TODO(sibyl-Aix6ihai): Provide empirical evidence for this. It is better # to run more than one iteration on single mini-batch as we want to # spend more time in compute. SDCA works better with larger # mini-batches and there is also recent work that shows its better to # reuse old samples than train on new samples. # See: http://arxiv.org/abs/1602.02136. num_inner_iterations=2, loss_type=self._options['loss_type']) with ops.control_dependencies([example_state_data_updated]): insert_op = self._hashtable.insert(example_ids_hashed, example_state_data_updated) update_ops = [insert_op] for name in [ 'sparse_features_weights', 'dense_features_weights' ]: for var, slot_var in zip(self._variables[name], self._slots['unshrinked_' + name]): update_ops.append(var.assign(slot_var)) update_group = control_flow_ops.group(*update_ops) with ops.control_dependencies([update_group]): shrink_l1 = _sdca_ops.sdca_shrink_l1( self._convert_n_to_tensor( self._variables['sparse_features_weights'], as_ref=True), self._convert_n_to_tensor( self._variables['dense_features_weights'], as_ref=True), l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization()) if not global_step: return shrink_l1 with ops.control_dependencies([shrink_l1]): return state_ops.assign_add(global_step, 1, name=name).op
def _default_local_init_op(): return control_flow_ops.group(variables.local_variables_initializer(), data_flow_ops.initialize_all_tables())
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" if (isinstance(features, ops.Tensor) or isinstance(features, sparse_tensor.SparseTensor)): features = {'features': features} weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) logits = graph_builder.inference_graph(features) # For binary classification problems, convert probabilities to logits. # Includes hack to get around the fact that a probability might be 0 or 1. if not params.regression and params.num_classes == 2: class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1]) logits = math_ops.log( math_ops.maximum( class_1_probs / math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON)) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_graph = None training_hooks = [] if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: with ops.control_dependencies([logits.op]): training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights # TensorForest's training graph isn't calculated directly from the loss # like many other models. def _train_fn(unused_loss): return training_graph model_ops = model_head.create_model_fn_ops(features=features, labels=labels, mode=mode, train_op_fn=_train_fn, logits=logits, scope=head_scope) if report_feature_importances: training_hooks.append( TensorForestRunOpAtEndHook({ 'feature_importances': graph_builder.feature_importances() })) if early_stopping_rounds: training_hooks.append( TensorForestLossHook( early_stopping_rounds, early_stopping_loss_threshold=early_stopping_loss_threshold, loss_op=model_ops.loss)) model_ops.training_hooks.extend(training_hooks) if keys is not None: model_ops.predictions[keys_name] = keys return model_ops
def _experimental_run_steps_on_iterator(self, fn, iterator, iterations, initial_loop_values=None): if initial_loop_values is None: initial_loop_values = {} initial_loop_values = nest.flatten(initial_loop_values) ctx = values.MultiStepContext() def body(i, *args): """A wrapper around `fn` to create the while loop body.""" del args fn_inputs = iterator.get_next() if not isinstance(fn_inputs, tuple): fn_inputs = (fn_inputs, ) fn_result = fn(ctx, fn_inputs) for (name, output) in ctx.last_step_outputs.items(): # Convert all outputs to tensors, potentially from `DistributedValues`. ctx.last_step_outputs[name] = self._unwrap(output) flat_last_step_outputs = nest.flatten(ctx.last_step_outputs) with ops.control_dependencies([fn_result]): return [i + 1] + flat_last_step_outputs # We capture the control_flow_context at this point, before we run `fn` # inside a while_loop. This is useful in cases where we might need to exit # these contexts and get back to the outer context to do some things, for # e.g. create an op which should be evaluated only once at the end of the # loop on the host. One such usage is in creating metrics' value op. self._outer_control_flow_context = ( ops.get_default_graph()._get_control_flow_context()) # pylint: disable=protected-access cond = lambda i, *args: i < iterations i = constant_op.constant(0) loop_result = control_flow_ops.while_loop(cond, body, [i] + initial_loop_values, name="", parallel_iterations=1, back_prop=False, swap_memory=False, return_same_structure=True) del self._outer_control_flow_context ctx.run_op = control_flow_ops.group(loop_result) # Convert the last_step_outputs from a list to the original dict structure # of last_step_outputs. last_step_tensor_outputs = loop_result[1:] last_step_tensor_outputs_dict = nest.pack_sequence_as( ctx.last_step_outputs, last_step_tensor_outputs) for name, reduce_op in ctx._last_step_outputs_reduce_ops.items(): # pylint: disable=protected-access output = last_step_tensor_outputs_dict[name] # For outputs that have already been reduced, wrap them in a Mirrored # container, else in a PerReplica container. if reduce_op is None: last_step_tensor_outputs_dict[name] = values.regroup( {d: t for d, t in zip(self._devices, output)}, values.PerReplica) else: assert len(output) == 1 last_step_tensor_outputs_dict[name] = output[0] ctx._set_last_step_outputs(last_step_tensor_outputs_dict) # pylint: disable=protected-access return ctx
def training_graph(self, input_data, input_labels, num_trainers=1, trainer_id=0, **tree_kwargs): """Constructs a TF graph for training a random forest. Args: input_data: A tensor or dict of string->Tensor for input data. input_labels: A tensor or placeholder for labels associated with input_data. num_trainers: Number of parallel trainers to split trees among. trainer_id: Which trainer this instance is. **tree_kwargs: Keyword arguments passed to each tree's training_graph. Returns: The last op in the random forest training graph. Raises: NotImplementedError: If trying to use bagging with sparse features. """ processed_dense_features, processed_sparse_features, data_spec = ( data_ops.ParseDataTensorOrDict(input_data)) if input_labels is not None: labels = data_ops.ParseLabelTensorOrDict(input_labels) data_spec = data_spec or self.get_default_data_spec(input_data) tree_graphs = [] trees_per_trainer = self.params.num_trees / num_trainers tree_start = int(trainer_id * trees_per_trainer) tree_end = int((trainer_id + 1) * trees_per_trainer) for i in range(tree_start, tree_end): with ops.device(self.variables.device_dummies[i].device): seed = self.params.base_random_seed if seed != 0: seed += i # If using bagging, randomly select some of the input. tree_data = processed_dense_features tree_labels = labels if self.params.bagging_fraction < 1.0: # TODO(gilberth): Support bagging for sparse features. if processed_sparse_features is not None: raise NotImplementedError( 'Bagging not supported with sparse features.') # TODO(thomaswc): This does sampling without replacement. Consider # also allowing sampling with replacement as an option. batch_size = array_ops.strided_slice( array_ops.shape(processed_dense_features), [0], [1]) r = random_ops.random_uniform(batch_size, seed=seed) mask = math_ops.less( r, array_ops.ones_like(r) * self.params.bagging_fraction) gather_indices = array_ops.squeeze(array_ops.where(mask), squeeze_dims=[1]) # TODO(thomaswc): Calculate out-of-bag data and labels, and store # them for use in calculating statistics later. tree_data = array_ops.gather(processed_dense_features, gather_indices) tree_labels = array_ops.gather(labels, gather_indices) if self.params.bagged_features: if processed_sparse_features is not None: raise NotImplementedError( 'Feature bagging not supported with sparse features.' ) tree_data = self._bag_features(i, tree_data) tree_graphs.append(self.trees[i].training_graph( tree_data, tree_labels, seed, data_spec=data_spec, sparse_features=processed_sparse_features, **tree_kwargs)) return control_flow_ops.group(*tree_graphs, name='train')
def initializer(self): # return grouped ops of all the var initializations of component values of # the mirrored variable return control_flow_ops.group( [v.initializer for v in self._index.values()])
def _call_for_each_tower(self, fn, *args, **kwargs): kwargs.pop('run_concurrently', None) inputs = {'args': args, 'kwargs': kwargs} flat_inputs = nest.flatten(inputs) feed_mask = [isinstance(f, values.PerIteration) for f in flat_inputs] feeds = lambda: itertools.compress(flat_inputs, feed_mask) shapes = [f.get_shape() for f in feeds()] if any([not s.is_fully_defined() for s in shapes]): raise ValueError( 'TPU currently requires fully defined shapes. Either use ' 'set_shape() on the input tensors or use ' 'dataset.apply(map_and_batch(..., drop_remainder=True)).') types = [f.get_dtype() for f in feeds()] def infeed_input(i): """Get input, split it and then enqueue.""" iteration_inputs = [f.get(i) for f in feeds()] infeed_inputs = [[inputs_per_core[core_id] for inputs_per_core in iteration_inputs] for core_id in range(self._num_cores_per_host)] infeed_ops = [] for core_id, infeed_input in enumerate(infeed_inputs): infeed_ops.append( tpu_ops.infeed_enqueue_tuple( inputs=infeed_input, shapes=shapes, device_ordinal=core_id)) with ops.control_dependencies(infeed_ops): return i + 1 with ops.device('/task:0/device:CPU:0'): enqueue_ops = control_flow_ops.while_loop( lambda i: i < self._iterations_per_step, infeed_input, [constant_op.constant(0)], parallel_iterations=1) def dequeueing_fn(*args, **kwargs): """Dequeue input arguments and supply them to `fn`.""" del args, kwargs dequeued = tpu.infeed_dequeue_tuple(dtypes=types, shapes=shapes) dequeued = iter(dequeued) fn_inputs = [] for inp, is_feed in zip(flat_inputs, feed_mask): if is_feed: fn_inputs.append(next(dequeued)) else: fn_inputs.append(inp) fn_inputs = nest.pack_sequence_as(inputs, fn_inputs) return fn(*fn_inputs['args'], **fn_inputs['kwargs']) def iterate_on_tpu(): return tpu.repeat(self._iterations_per_step, dequeueing_fn, []) with one_device_strategy._OneDeviceTowerContext(self): # pylint: disable=protected-access tpu_result = tpu.batch_parallel( iterate_on_tpu, [], num_shards=self._num_cores_per_host) return control_flow_ops.group(tpu_result, enqueue_ops)
def initializer(self): return control_flow_ops.group( [iterator.initializer for iterator in self._iterators.values()])
def initialize_iterator(iterator, distribution_strategy): with distribution_strategy.scope(): init_op = control_flow_ops.group(iterator.initialize()) if not context.executing_eagerly(): K.get_session((init_op,)).run(init_op)
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None, multi_label=False, label_weights=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value, float tensor, python list, or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). multi_label: Optional boolean indicating whether multidimensional prediction/labels should be treated as multilabel responses, or flattened into a single label. When True, the valus of `variables_to_update` must have a second dimension equal to the number of labels in y_true and y_pred, and those tensors must not be RaggedTensors. label_weights: (optional) tensor of non-negative weights for multilabel data. The weights are applied when calculating TP, FP, FN, and TN without explicit multilabel handling (i.e. when the data is to be flattened). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if multi_label and label_weights is not None: raise ValueError( '`label_weights` for multilabel data should be handled ' 'outside of `update_confusion_matrix_variables` when ' '`multi_label` is True.') if variables_to_update is None: return if not any(key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format(list(ConfusionMatrix), variables_to_update.keys())) variable_dtype = list(variables_to_update.values())[0].dtype y_true = math_ops.cast(y_true, dtype=variable_dtype) y_pred = math_ops.cast(y_pred, dtype=variable_dtype) thresholds = ops.convert_to_tensor_v2(thresholds, dtype=variable_dtype) num_thresholds = thresholds.shape[0] if multi_label: one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32), array_ops.rank(thresholds), name='one_set_of_thresholds_cond') else: [y_pred, y_true ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true], sample_weight) one_thresh = math_ops.cast(True, dtype=dtypes.bool) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal(y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal(y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): if sample_weight is None: y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) else: y_pred, y_true, sample_weight = ( tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true, sample_weight=sample_weight)) y_pred.shape.assert_is_compatible_with(y_true.shape) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] pred_shape = array_ops.shape(y_pred) num_predictions = pred_shape[0] if y_pred.shape.ndims == 1: num_labels = 1 else: num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0) thresh_label_tile = control_flow_ops.cond( one_thresh, lambda: num_labels, lambda: math_ops.cast(1, dtype=dtypes.int32)) # Reshape predictions and labels, adding a dim for thresholding. if multi_label: predictions_extra_dim = array_ops.expand_dims(y_pred, 0) labels_extra_dim = array_ops.expand_dims( math_ops.cast(y_true, dtype=dtypes.bool), 0) else: # Flatten predictions and labels when not multilabel. predictions_extra_dim = array_ops.reshape(y_pred, [1, -1]) labels_extra_dim = array_ops.reshape( math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. if multi_label: thresh_pretile_shape = [num_thresholds, 1, -1] thresh_tiles = [1, num_predictions, thresh_label_tile] data_tiles = [num_thresholds, 1, 1] else: thresh_pretile_shape = [num_thresholds, -1] thresh_tiles = [1, num_predictions * num_labels] data_tiles = [num_thresholds, 1] thresh_tiled = array_ops.tile( array_ops.reshape(thresholds, thresh_pretile_shape), array_ops.stack(thresh_tiles)) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_extra_dim, data_tiles) if sample_weight is not None: sample_weight = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=variable_dtype), y_pred) weights_tiled = array_ops.tile( array_ops.reshape(sample_weight, thresh_tiles), data_tiles) else: weights_tiled = None if label_weights is not None and not multi_label: label_weights = array_ops.expand_dims(label_weights, 0) label_weights = weights_broadcast_ops.broadcast_weights( label_weights, y_pred) label_weights_tiled = array_ops.tile( array_ops.reshape(label_weights, thresh_tiles), data_tiles) if weights_tiled is None: weights_tiled = label_weights_tiled else: weights_tiled = math_ops.multiply(weights_tiled, label_weights_tiled) update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast(math_ops.logical_and(label, pred), dtype=var.dtype) if weights is not None: label_and_pred *= math_ops.cast(weights, dtype=var.dtype) return var.assign_add(math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def assign_from_checkpoint_fusion(model_path_1, var_list_1,model_path_2,var_list_2): """Creates an operation to assign specific variables from a checkpoint. Args: model_path: The full path to the model checkpoint. To get latest checkpoint use `model_path = tf.train.latest_checkpoint(checkpoint_dir)` var_list: A list of `Variable` objects or a dictionary mapping names in the checkpoint to the corresponding variables to initialize. If empty or None, it would return no_op(), None. Returns: the restore_op and the feed_dict that need to be run to restore var_list. Raises: ValueError: If the checkpoint specified at `model_path` is missing one of the variables in `var_list`. """ reader_1 = pywrap_tensorflow.NewCheckpointReader(model_path_1) reader_2 = pywrap_tensorflow.NewCheckpointReader(model_path_2) if isinstance(var_list_1, (tuple, list)): var_list_1 = {var.op.name: var for var in var_list_1} if isinstance(var_list_2, (tuple, list)): var_list_2 = {var.op.name: var for var in var_list_2} feed_dict = {} assign_ops = [] for checkpoint_var_name_1 in var_list_1: checkpoint_var_name_1_r = checkpoint_var_name_1.decode("utf-8").replace(u"vgg_19_face", "vgg_19") var = var_list_1[checkpoint_var_name_1] if not reader_1.has_tensor(checkpoint_var_name_1_r): raise ValueError( 'Checkpoint is missing variable [%s]' % checkpoint_var_name_1_r) var_value = reader_1.get_tensor(checkpoint_var_name_1_r) placeholder_name = 'placeholder/' + var.op.name placeholder_value = array_ops.placeholder( dtype=var.dtype.base_dtype, shape=var.get_shape(), name=placeholder_name) assign_ops.append(var.assign(placeholder_value)) if var.get_shape() != var_value.shape: raise ValueError( 'Total size of new array must be unchanged for %s ' 'lh_shape: [%s], rh_shape: [%s]' % (checkpoint_var_name_1_r, str(var_value.shape), str(var.get_shape()))) feed_dict[placeholder_value] = var_value.reshape(var.get_shape()) for checkpoint_var_name_2 in var_list_2: checkpoint_var_name_2_r = checkpoint_var_name_2.decode("utf-8").replace(u"vgg_19_iris", "vgg_19") var = var_list_2[checkpoint_var_name_2] if not reader_2.has_tensor(checkpoint_var_name_2_r): raise ValueError( 'Checkpoint is missing variable [%s]' % checkpoint_var_name_2_r) var_value = reader_2.get_tensor(checkpoint_var_name_2_r) placeholder_name = 'placeholder/' + var.op.name placeholder_value = array_ops.placeholder( dtype=var.dtype.base_dtype, shape=var.get_shape(), name=placeholder_name) assign_ops.append(var.assign(placeholder_value)) if var.get_shape() != var_value.shape: raise ValueError( 'Total size of new array must be unchanged for %s ' 'lh_shape: [%s], rh_shape: [%s]' % (checkpoint_var_name_2_r, str(var_value.shape), str(var.get_shape()))) feed_dict[placeholder_value] = var_value.reshape(var.get_shape()) assign_op = control_flow_ops.group(*assign_ops) return assign_op, feed_dict
def change_weights_op(self, v_new, w_new): return control_flow_ops.group( [self.v.assign(v_new), self.w.assign(w_new)])
def default_init_op(): return control_flow_ops.group( variables.global_variables_initializer(), resources.initialize_resources(resources.shared_resources()))
def _resource_apply_sparse(self, grad, var, indices, apply_state=None): var_device, var_dtype = var.device, var.dtype.base_dtype coefficients = ((apply_state or {}).get((var_device, var_dtype)) or self._fallback_apply_state(var_device, var_dtype)) rms = self.get_slot(var, "rms") if self._momentum: mom = self.get_slot(var, "momentum") if self.centered: mg = self.get_slot(var, "mg") return training_ops.resource_sparse_apply_centered_rms_prop( var.handle, mg.handle, rms.handle, mom.handle, coefficients["lr_t"], coefficients["rho"], coefficients["momentum"], coefficients["epsilon"], grad, indices, use_locking=self._use_locking) else: return training_ops.resource_sparse_apply_rms_prop( var.handle, rms.handle, mom.handle, coefficients["lr_t"], coefficients["rho"], coefficients["momentum"], coefficients["epsilon"], grad, indices, use_locking=self._use_locking) else: rms_scaled_g_values = (grad * grad) * coefficients["one_minus_rho"] rms_t = state_ops.assign(rms, rms * coefficients["rho"], use_locking=self._use_locking) with ops.control_dependencies([rms_t]): rms_t = self._resource_scatter_add(rms, indices, rms_scaled_g_values) rms_slice = array_ops.gather(rms_t, indices) denom_slice = rms_slice if self.centered: mg = self.get_slot(var, "mg") mg_scaled_g_values = grad * coefficients["one_minus_rho"] mg_t = state_ops.assign(mg, mg * coefficients["rho"], use_locking=self._use_locking) with ops.control_dependencies([mg_t]): mg_t = self._resource_scatter_add(mg, indices, mg_scaled_g_values) mg_slice = array_ops.gather(mg_t, indices) denom_slice = rms_slice - math_ops.square(mg_slice) var_update = self._resource_scatter_add( var, indices, coefficients["neg_lr_t"] * grad / (math_ops.sqrt(denom_slice) + coefficients["epsilon"])) if self.centered: return control_flow_ops.group(*[var_update, rms_t, mg_t]) return control_flow_ops.group(*[var_update, rms_t])
def just_update_steps(): # When bad_steps is incremented, good_step is reset. return control_flow_ops.group( state_ops.assign_add(self._num_bad_steps, 1), state_ops.assign(self._num_good_steps, 0))
def _reset_stats(self): return control_flow_ops.group( state_ops.assign(self._num_good_steps, 0), state_ops.assign(self._num_bad_steps, 0))
def _wals_factorization_model_function(features, labels, mode, params): """Model function for the WALSFactorization estimator. Args: features: Dictionary of features. See WALSMatrixFactorization. labels: Must be None. mode: A model_fn.ModeKeys object. params: Dictionary of parameters containing arguments passed to the WALSMatrixFactorization constructor. Returns: A ModelFnOps object. Raises: ValueError: If `mode` is not recognized. """ assert labels is None use_factors_weights_cache = ( params["use_factors_weights_cache_for_training"] and mode == model_fn.ModeKeys.TRAIN) use_gramian_cache = (params["use_gramian_cache_for_training"] and mode == model_fn.ModeKeys.TRAIN) max_sweeps = params["max_sweeps"] model = factorization_ops.WALSModel( params["num_rows"], params["num_cols"], params["embedding_dimension"], unobserved_weight=params["unobserved_weight"], regularization=params["regularization_coeff"], row_init=params["row_init"], col_init=params["col_init"], num_row_shards=params["num_row_shards"], num_col_shards=params["num_col_shards"], row_weights=params["row_weights"], col_weights=params["col_weights"], use_factors_weights_cache=use_factors_weights_cache, use_gramian_cache=use_gramian_cache) # Get input rows and cols. We either update rows or columns depending on # the value of row_sweep, which is maintained using a session hook. input_rows = features[WALSMatrixFactorization.INPUT_ROWS] input_cols = features[WALSMatrixFactorization.INPUT_COLS] # TRAIN mode: if mode == model_fn.ModeKeys.TRAIN: # Training consists of the following ops (controlled using a SweepHook). # Before a row sweep: # row_update_prep_gramian_op # initialize_row_update_op # During a row sweep: # update_row_factors_op # Before a col sweep: # col_update_prep_gramian_op # initialize_col_update_op # During a col sweep: # update_col_factors_op is_row_sweep_var = variable_scope.variable( True, trainable=False, name="is_row_sweep", collections=[ops.GraphKeys.GLOBAL_VARIABLES]) is_sweep_done_var = variable_scope.variable( False, trainable=False, name="is_sweep_done", collections=[ops.GraphKeys.GLOBAL_VARIABLES]) completed_sweeps_var = variable_scope.variable( 0, trainable=False, name=WALSMatrixFactorization.COMPLETED_SWEEPS, collections=[ops.GraphKeys.GLOBAL_VARIABLES]) loss_var = variable_scope.variable( 0., trainable=False, name=WALSMatrixFactorization.LOSS, collections=[ops.GraphKeys.GLOBAL_VARIABLES]) # The root weighted squared error = # \sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij ) rwse_var = variable_scope.variable( 0., trainable=False, name=WALSMatrixFactorization.RWSE, collections=[ops.GraphKeys.GLOBAL_VARIABLES]) summary.scalar("loss", loss_var) summary.scalar("root_weighted_squared_error", rwse_var) summary.scalar("completed_sweeps", completed_sweeps_var) def create_axis_ops(sp_input, num_items, update_fn, axis_name): """Creates book-keeping and training ops for a given axis. Args: sp_input: A SparseTensor corresponding to the row or column batch. num_items: An integer, the total number of items of this axis. update_fn: A function that takes one argument (`sp_input`), and that returns a tuple of * new_factors: A float Tensor of the factor values after update. * update_op: a TensorFlow op which updates the factors. * loss: A float Tensor, the unregularized loss. * reg_loss: A float Tensor, the regularization loss. * sum_weights: A float Tensor, the sum of factor weights. axis_name: A string that specifies the name of the axis. Returns: A tuple consisting of: * reset_processed_items_op: A TensorFlow op, to be run before the beginning of any sweep. It marks all items as not-processed. * axis_train_op: A Tensorflow op, to be run during this axis' sweeps. """ processed_items_init = array_ops.fill(dims=[num_items], value=False) with ops.colocate_with(processed_items_init): processed_items = variable_scope.variable( processed_items_init, collections=[ops.GraphKeys.GLOBAL_VARIABLES], trainable=False, name="processed_" + axis_name) _, update_op, loss, reg, sum_weights = update_fn(sp_input) input_indices = sp_input.indices[:, 0] with ops.control_dependencies([ update_op, state_ops.assign(loss_var, loss + reg), state_ops.assign(rwse_var, math_ops.sqrt(loss / sum_weights)) ]): with ops.colocate_with(processed_items): update_processed_items = state_ops.scatter_update( processed_items, input_indices, array_ops.ones_like(input_indices, dtype=dtypes.bool), name="update_processed_{}_indices".format(axis_name)) with ops.control_dependencies([update_processed_items]): is_sweep_done = math_ops.reduce_all(processed_items) axis_train_op = control_flow_ops.group( state_ops.assign(is_sweep_done_var, is_sweep_done), state_ops.assign_add( completed_sweeps_var, math_ops.cast(is_sweep_done, dtypes.int32)), name="{}_sweep_train_op".format(axis_name)) return processed_items.initializer, axis_train_op reset_processed_rows_op, row_train_op = create_axis_ops( input_rows, params["num_rows"], lambda x: model.update_row_factors(sp_input=x, transpose_input=False), "rows") reset_processed_cols_op, col_train_op = create_axis_ops( input_cols, params["num_cols"], lambda x: model.update_col_factors(sp_input=x, transpose_input=True), "cols") switch_op = control_flow_ops.group(state_ops.assign( is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)), reset_processed_rows_op, reset_processed_cols_op, name="sweep_switch_op") row_prep_ops = [ model.row_update_prep_gramian_op, model.initialize_row_update_op ] col_prep_ops = [ model.col_update_prep_gramian_op, model.initialize_col_update_op ] init_op = model.worker_init sweep_hook = _SweepHook(is_row_sweep_var, is_sweep_done_var, init_op, row_prep_ops, col_prep_ops, row_train_op, col_train_op, switch_op) global_step_hook = _IncrementGlobalStepHook() training_hooks = [sweep_hook, global_step_hook] if max_sweeps is not None: training_hooks.append(_StopAtSweepHook(max_sweeps)) return model_fn.ModelFnOps(mode=model_fn.ModeKeys.TRAIN, predictions={}, loss=loss_var, eval_metric_ops={}, train_op=control_flow_ops.no_op(), training_hooks=training_hooks) # INFER mode elif mode == model_fn.ModeKeys.INFER: projection_weights = features.get( WALSMatrixFactorization.PROJECTION_WEIGHTS) def get_row_projection(): return model.project_row_factors( sp_input=input_rows, projection_weights=projection_weights, transpose_input=False) def get_col_projection(): return model.project_col_factors( sp_input=input_cols, projection_weights=projection_weights, transpose_input=True) predictions = { WALSMatrixFactorization.PROJECTION_RESULT: control_flow_ops.cond( features[WALSMatrixFactorization.PROJECT_ROW], get_row_projection, get_col_projection) } return model_fn.ModelFnOps(mode=model_fn.ModeKeys.INFER, predictions=predictions, loss=None, eval_metric_ops={}, train_op=control_flow_ops.no_op(), training_hooks=[]) # EVAL mode elif mode == model_fn.ModeKeys.EVAL: def get_row_loss(): _, _, loss, reg, _ = model.update_row_factors( sp_input=input_rows, transpose_input=False) return loss + reg def get_col_loss(): _, _, loss, reg, _ = model.update_col_factors(sp_input=input_cols, transpose_input=True) return loss + reg loss = control_flow_ops.cond( features[WALSMatrixFactorization.PROJECT_ROW], get_row_loss, get_col_loss) return model_fn.ModelFnOps(mode=model_fn.ModeKeys.EVAL, predictions={}, loss=loss, eval_metric_ops={}, train_op=control_flow_ops.no_op(), training_hooks=[]) else: raise ValueError("mode=%s is not recognized." % str(mode))
def training_ops(self): """Returns the training operation.""" return control_flow_ops.group(*self._train_ops)
def decr_loss_scale(): update_op = state_ops.assign( self._loss_scale, gen_math_ops.maximum(1., self._loss_scale * self._decr_ratio)) # When loss_scale is updated, both good and bad steps are reset. return control_flow_ops.group(update_op, self._reset_stats())
def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. In order to prevent deadlocks, if the prefetch_buffer_size is greater than the max_buffer_size, we set the max_buffer_size to prefetch_buffer_size. Raises: RuntimeError: If run in Eager mode. """ self._dataset = dataset._apply_options() # pylint: disable=protected-access self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) self._max_buffer_size = max_buffer_size self._prefetch_buffer_size = prefetch_buffer_size if self._prefetch_buffer_size > self._max_buffer_size: self._max_buffer_size = self._prefetch_buffer_size # Create the MultiDeviceIterator. with ops.device(self._source_device): # TODO(b/121378567): Get rid of this shared_name hack. shared_name = "" if context.executing_eagerly(): shared_name = context.shared_name() self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name=shared_name, container="", **dataset_ops.flat_structure(self._dataset))) if context.executing_eagerly(): # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._multi_device_iterator_resource, handle_device=self._source_device) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._variant_tensor, # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=self._max_buffer_size) self._prototype_device_datasets = [] for i, device in enumerate(self._devices): with ops.device(device): ds = _PerDeviceGenerator(i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, self._dataset._element_structure) # pylint: disable=protected-access self._prototype_device_datasets.append(ds) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] for i, device in enumerate(self._devices): with ops.device(device): ds = self._create_device_dataset(i) if context.executing_eagerly(): self._device_iterators.append( dataset_ops.make_one_shot_iterator(ds)) else: self._device_iterators.append( dataset_ops.make_initializable_iterator(ds)) if not context.executing_eagerly(): device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group( *device_iterator_initializers)
def minimize(self, global_step=None, name=None): """Add operations to train a linear model by minimizing the loss function. Args: global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Returns: An Operation that updates the variables passed in the constructor. """ # Technically, the op depends on a lot more than the variables, # but we'll keep the list short. with name_scope(name, 'sdca/minimize'): sparse_example_indices = [] sparse_feature_indices = [] sparse_features_values = [] for sf in self._examples['sparse_features']: sparse_example_indices.append(sf.example_indices) sparse_feature_indices.append(sf.feature_indices) # If feature values are missing, sdca assumes a value of 1.0f. if sf.feature_values is not None: sparse_features_values.append(sf.feature_values) # pylint: disable=protected-access example_ids_hashed = gen_sdca_ops.sdca_fprint( internal_convert_to_tensor(self._examples['example_ids'])) # pylint: enable=protected-access example_state_data = self._hashtable.lookup(example_ids_hashed) # Solver returns example_state_update, new delta sparse_feature_weights # and delta dense_feature_weights. sparse_weights = [] sparse_indices = [] # If we have partitioned variables, keep a few dictionaries of Tensors # around that we need for the assign_add after the op call to # gen_sdca_ops.sdca_optimizer(). These are keyed because we may have a # mix of partitioned and un-partitioned variables. num_partitions_by_var = {} p_assignments_by_var = {} gather_ids_by_var = {} for v_num, (w, i) in enumerate( zip(self._slots['unshrinked_sparse_features_weights'], sparse_feature_indices)): # Append the sparse_indices (in full-variable space). sparse_idx = math_ops.cast( array_ops.unique(math_ops.cast(i, dtypes.int32))[0], dtypes.int64) sparse_indices.append(sparse_idx) if isinstance(w, list) or isinstance( w, var_ops.PartitionedVariable): num_partitions = len(w) flat_ids = array_ops.reshape(sparse_idx, [-1]) # We use div partitioning, which is easiest to support downstream. # Compute num_total_ids as the sum of dim-0 of w, then assign # to partitions based on a constant number of ids per partition. # Optimize if we already know the full shape statically. dim_0_size = self._get_first_dimension_size_statically( w, num_partitions) if dim_0_size.value: num_total_ids = constant_op.constant( dim_0_size.value, flat_ids.dtype) else: dim_0_sizes = [] for p in range(num_partitions): if w[p].get_shape()[0].value is not None: dim_0_sizes.append(w[p].get_shape()[0].value) else: with ops.colocate_with(w[p]): dim_0_sizes.append( array_ops.shape(w[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // num_partitions extras = num_total_ids % num_partitions p_assignments = math_ops.maximum( flat_ids // (ids_per_partition + 1), (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor new_ids = array_ops.where( p_assignments < extras, flat_ids % (ids_per_partition + 1), (flat_ids - extras) % ids_per_partition) # Cast partition assignments to int32 for use in dynamic_partition. # There really should not be more than 2^32 partitions. p_assignments = math_ops.cast(p_assignments, dtypes.int32) # Partition list of ids based on assignments into num_partitions # separate lists. gather_ids = data_flow_ops.dynamic_partition( new_ids, p_assignments, num_partitions) # Add these into the dictionaries for use in the later update. num_partitions_by_var[v_num] = num_partitions p_assignments_by_var[v_num] = p_assignments gather_ids_by_var[v_num] = gather_ids # Gather the weights from each partition. partition_gathered_weights = [] for p in range(num_partitions): with ops.colocate_with(w[p]): partition_gathered_weights.append( array_ops.gather(w[p], gather_ids[p])) # Stitch the weights back together in the same order they were before # we dynamic_partitioned them. condition_indices = data_flow_ops.dynamic_partition( math_ops.range(array_ops.shape(new_ids)[0]), p_assignments, num_partitions) batch_gathered_weights = data_flow_ops.dynamic_stitch( condition_indices, partition_gathered_weights) else: w_as_tensor = internal_convert_to_tensor(w) with ops.device(w_as_tensor.device): batch_gathered_weights = array_ops.gather( w_as_tensor, sparse_idx) sparse_weights.append(batch_gathered_weights) # pylint: disable=protected-access if compat.forward_compatible(year=2018, month=10, day=30): esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2( sparse_example_indices, sparse_feature_indices, sparse_features_values, self._convert_n_to_tensor( self._examples['dense_features']), internal_convert_to_tensor( self._examples['example_weights']), internal_convert_to_tensor( self._examples['example_labels']), sparse_indices, sparse_weights, self._convert_n_to_tensor( self._slots['unshrinked_dense_features_weights']), example_state_data, loss_type=self._options['loss_type'], l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization(), num_loss_partitions=self._num_loss_partitions(), num_inner_iterations=1, adaptive=self._adaptive()) else: esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( sparse_example_indices, sparse_feature_indices, sparse_features_values, self._convert_n_to_tensor( self._examples['dense_features']), internal_convert_to_tensor( self._examples['example_weights']), internal_convert_to_tensor( self._examples['example_labels']), sparse_indices, sparse_weights, self._convert_n_to_tensor( self._slots['unshrinked_dense_features_weights']), example_state_data, loss_type=self._options['loss_type'], l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization(), num_loss_partitions=self._num_loss_partitions(), num_inner_iterations=1, adaptative=self._adaptive()) # pylint: enable=protected-access with ops.control_dependencies([esu]): update_ops = [self._hashtable.insert(example_ids_hashed, esu)] # Update the weights before the proximal step. for v_num, (w, i, u) in enumerate( zip(self._slots['unshrinked_sparse_features_weights'], sparse_indices, sfw)): if (isinstance(w, var_ops.PartitionedVariable) or isinstance(w, list)): update_ops += self._get_partitioned_update_ops( v_num, num_partitions_by_var, p_assignments_by_var, gather_ids_by_var, w, u, p_assignments, num_partitions) else: update_ops.append(state_ops.scatter_add(w, i, u)) for w, u in zip( self._slots['unshrinked_dense_features_weights'], dfw): if (isinstance(w, var_ops.PartitionedVariable) or isinstance(w, list)): split_updates = array_ops.split( u, num_or_size_splits=[ v.shape.as_list()[0] for v in w ]) for v, split_update in zip(w, split_updates): update_ops.append( state_ops.assign_add(v, split_update)) else: update_ops.append(state_ops.assign_add(w, u)) if not global_step: return control_flow_ops.group(*update_ops) with ops.control_dependencies(update_ops): return state_ops.assign_add(global_step, 1, name=name).op
def apply(self, var_list=None): """Maintains moving averages of variables. `var_list` must be a list of `Variable` or `Tensor` objects. This method creates shadow variables for all elements of `var_list`. Shadow variables for `Variable` objects are initialized to the variable's initial value. They will be added to the `GraphKeys.MOVING_AVERAGE_VARIABLES` collection. For `Tensor` objects, the shadow variables are initialized to 0 and zero debiased (see docstring in `assign_moving_average` for more details). shadow variables are created with `trainable=False` and added to the `GraphKeys.ALL_VARIABLES` collection. They will be returned by calls to `tf.global_variables()`. Returns an op that updates all shadow variables as described above. Note that `apply()` can be called multiple times with different lists of variables. Args: var_list: A list of Variable or Tensor objects. The variables and Tensors must be of types float16, float32, or float64. Returns: An Operation that updates the moving averages. Raises: TypeError: If the arguments are not all float16, float32, or float64. ValueError: If the moving average of one of the variables is already being computed. """ # TODO(touts): op_scope if var_list is None: var_list = variables.trainable_variables() zero_debias_true = set() # set of vars to set `zero_debias=True` for var in var_list: if var.dtype.base_dtype not in [ dtypes.float16, dtypes.float32, dtypes.float64 ]: raise TypeError( "The variables must be half, float, or double: %s" % var.name) if var in self._averages: raise ValueError("Moving average already computed for: %s" % var.name) # For variables: to lower communication bandwidth across devices we keep # the moving averages on the same device as the variables. For other # tensors, we rely on the existing device allocation mechanism. with ops.control_dependencies(None): if isinstance(var, variables.Variable): avg = slot_creator.create_slot(var, var.initialized_value(), self._name, colocate_with_primary=True) # NOTE(mrry): We only add `tf.Variable` objects to the # `MOVING_AVERAGE_VARIABLES` collection. ops.add_to_collection( ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var) else: avg = slot_creator.create_zeros_slot( var, self._name, colocate_with_primary=(var.op.type in ["Variable", "VariableV2"])) if self._zero_debias: zero_debias_true.add(avg) self._averages[var] = avg with ops.name_scope(self._name) as scope: decay = ops.convert_to_tensor(self._decay, name="decay") if self._num_updates is not None: num_updates = math_ops.cast(self._num_updates, dtypes.float32, name="num_updates") decay = math_ops.minimum(decay, (1.0 + num_updates) / (10.0 + num_updates)) updates = [] for var in var_list: zero_debias = self._averages[var] in zero_debias_true updates.append( assign_moving_average(self._averages[var], var, decay, zero_debias=zero_debias)) return control_flow_ops.group(*updates, name=scope)
def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers, total_counts): """Creates an op for training for mini batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. total_counts: Tensor Ref of cluster counts. Returns: An op for doing an update of mini-batch k-means. """ update_ops = [] for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp, ignore_existing=True): assert total_counts is not None cluster_idx = array_ops.reshape(cluster_idx, [-1]) # Dedupe the unique ids of cluster_centers being updated so that updates # can be locally aggregated. unique_ids, unique_idx = array_ops.unique(cluster_idx) num_unique_cluster_idx = array_ops.size(unique_ids) # Fetch the old values of counts and cluster_centers. with ops.colocate_with(total_counts, ignore_existing=True): old_counts = array_ops.gather(total_counts, unique_ids) # TODO(agarwal): This colocation seems to run into problems. Fix it. with ops.colocate_with(cluster_centers, ignore_existing=True): old_cluster_centers = array_ops.gather(cluster_centers, unique_ids) # Locally aggregate the increment to counts. count_updates = math_ops.unsorted_segment_sum( array_ops.ones_like(unique_idx, dtype=total_counts.dtype), unique_idx, num_unique_cluster_idx) # Locally compute the sum of inputs mapped to each id. # For a cluster with old cluster value x, old count n, and with data # d_1,...d_k newly assigned to it, we recompute the new value as # \\(x += (sum_i(d_i) - k * x) / (n + k)\\). # Compute \\(sum_i(d_i)\\), see comment above. cluster_center_updates = math_ops.unsorted_segment_sum( inp, unique_idx, num_unique_cluster_idx) # Shape to enable broadcasting count_updates and learning_rate to inp. # It extends the shape with 1's to match the rank of inp. broadcast_shape = array_ops.concat([ array_ops.reshape(num_unique_cluster_idx, [1]), array_ops.ones( array_ops.reshape(array_ops.rank(inp) - 1, [1]), dtype=dtypes.int32) ], 0) # Subtract k * x, see comment above. cluster_center_updates -= math_ops.cast( array_ops.reshape(count_updates, broadcast_shape), inp.dtype) * old_cluster_centers learning_rate = math_ops.reciprocal( math_ops.cast(old_counts + count_updates, inp.dtype)) learning_rate = array_ops.reshape(learning_rate, broadcast_shape) # scale by 1 / (n + k), see comment above. cluster_center_updates *= learning_rate # Apply the updates. update_counts = state_ops.scatter_add(total_counts, unique_ids, count_updates) update_cluster_centers = state_ops.scatter_add( cluster_centers, unique_ids, cluster_center_updates) update_ops.extend([update_counts, update_cluster_centers]) return control_flow_ops.group(*update_ops)
def minimize(self, global_step=None, name=None): """Add operations to train a linear model by minimizing the loss function. Args: global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Returns: An Operation that updates the variables passed in the constructor. """ # Technically, the op depends on a lot more than the variables, # but we'll keep the list short. with name_scope(name, 'sdca/minimize'): sparse_example_indices = [] sparse_feature_indices = [] sparse_features_values = [] for sf in self._examples['sparse_features']: sparse_example_indices.append(sf.example_indices) sparse_feature_indices.append(sf.feature_indices) # If feature values are missing, sdca assumes a value of 1.0f. if sf.feature_values is not None: sparse_features_values.append(sf.feature_values) # pylint: disable=protected-access example_ids_hashed = gen_sdca_ops.sdca_fprint( internal_convert_to_tensor(self._examples['example_ids'])) # pylint: enable=protected-access example_state_data = self._hashtable.lookup(example_ids_hashed) # Solver returns example_state_update, new delta sparse_feature_weights # and delta dense_feature_weights. weights_tensor = self._convert_n_to_tensor( self._slots['unshrinked_sparse_features_weights']) sparse_weights = [] sparse_indices = [] for w, i in zip(weights_tensor, sparse_feature_indices): # Find the feature ids to lookup in the variables. with ops.device(w.device): sparse_indices.append( math_ops.cast( array_ops.unique(math_ops.cast(i, dtypes.int32))[0], dtypes.int64)) sparse_weights.append( array_ops.gather(w, sparse_indices[-1])) # pylint: disable=protected-access esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( sparse_example_indices, sparse_feature_indices, sparse_features_values, self._convert_n_to_tensor(self._examples['dense_features']), internal_convert_to_tensor(self._examples['example_weights']), internal_convert_to_tensor(self._examples['example_labels']), sparse_indices, sparse_weights, self._convert_n_to_tensor( self._slots['unshrinked_dense_features_weights']), example_state_data, loss_type=self._options['loss_type'], l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization(), num_loss_partitions=self._num_loss_partitions(), num_inner_iterations=1) # pylint: enable=protected-access with ops.control_dependencies([esu]): update_ops = [self._hashtable.insert(example_ids_hashed, esu)] # Update the weights before the proximal step. for w, i, u in zip( self._slots['unshrinked_sparse_features_weights'], sparse_indices, sfw): update_ops.append(state_ops.scatter_add(w, i, u)) for w, u in zip( self._slots['unshrinked_dense_features_weights'], dfw): update_ops.append(w.assign_add(u)) if not global_step: return control_flow_ops.group(*update_ops) with ops.control_dependencies(update_ops): return state_ops.assign_add(global_step, 1, name=name).op
def _experimental_run_steps_on_iterator(self, fn, multi_worker_iterator, iterations, initial_loop_values=None): # Wrap `fn` for repeat. if initial_loop_values is None: initial_loop_values = {} initial_loop_values = nest.flatten(initial_loop_values) ctx = input_lib.MultiStepContext() def run_fn(inputs): """Single step on the TPU device.""" fn_result = fn(ctx, inputs) flat_last_step_outputs = nest.flatten(ctx.last_step_outputs) if flat_last_step_outputs: with ops.control_dependencies([fn_result]): return [ array_ops.identity(f) for f in flat_last_step_outputs ] else: return fn_result # We capture the control_flow_context at this point, before we run `fn` # inside a while_loop and TPU replicate context. This is useful in cases # where we might need to exit these contexts and get back to the outer # context to do some things, for e.g. create an op which should be # evaluated only once at the end of the loop on the host. One such usage # is in creating metrics' value op. self._outer_control_flow_context = ( ops.get_default_graph()._get_control_flow_context()) # pylint: disable=protected-access def rewrite_fn(*args): """The rewritten step fn running on TPU.""" del args per_replica_inputs = multi_worker_iterator.get_next() replicate_inputs = [] for replica_id in range(self._num_replicas_in_sync): select_replica = lambda x: values.select_replica(replica_id, x) # pylint: disable=cell-var-from-loop replicate_inputs.append( (nest.map_structure(select_replica, per_replica_inputs), )) replicate_outputs = tpu.replicate( run_fn, replicate_inputs, device_assignment=self._device_assignment) # If run_fn has tensor outputs, tpu.replicate returns a list of list. We # will flatten it in this case. If run_fn has no tensor outputs, # tpu.replicate returns a list of no_ops, we will keep the output as it # is. if isinstance(replicate_outputs[0], list): replicate_outputs = nest.flatten(replicate_outputs) return replicate_outputs # TODO(sourabhbajaj): The input to while loop should be based on the # output type of the step_fn assert isinstance(initial_loop_values, list) initial_loop_values = initial_loop_values * self._num_replicas_in_sync # Put the while loop op on TPU host 0. with ops.device(self._host_device): if self.steps_per_run == 1: replicate_outputs = rewrite_fn() else: replicate_outputs = training_loop.repeat( iterations, rewrite_fn, initial_loop_values) del self._outer_control_flow_context ctx.run_op = control_flow_ops.group(replicate_outputs) if isinstance(replicate_outputs, list): # Filter out any ops from the outputs, typically this would be the case # when there were no tensor outputs. last_step_tensor_outputs = [ x for x in replicate_outputs if not isinstance(x, ops.Operation) ] # Outputs are currently of the structure (flattened) # [output0_device0, output1_device0, output2_device0, # output0_device1, output1_device1, output2_device1, # ...] # Convert this to the following structure instead: (grouped by output) # [[output0_device0, output0_device1], # [output1_device0, output1_device1], # [output2_device0, output2_device1]] output_num = len( last_step_tensor_outputs) // self._num_replicas_in_sync last_step_tensor_outputs = [ last_step_tensor_outputs[i::output_num] for i in range(output_num) ] else: # no tensors returned. last_step_tensor_outputs = [] _set_last_step_outputs(ctx, last_step_tensor_outputs) return ctx
def init_ops(self): """Returns the initialization operation.""" return control_flow_ops.group(*self._init_ops)
def training_graph(self, input_data, input_labels, random_seed, data_spec, input_weights=None): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or SparseTensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. data_spec: A list of tf.dtype values specifying the original types of each column. input_weights: A float tensor or placeholder holding per-input weights, or None if all inputs are to be weighted equally. Returns: The last op in the random tree training graph. """ epoch = math_ops.to_int32(get_epoch_variable()) if input_weights is None: input_weights = [] sparse_indices = [] sparse_values = [] sparse_shape = [] if isinstance(input_data, sparse_tensor.SparseTensor): sparse_indices = input_data.indices sparse_values = input_data.values sparse_shape = input_data.dense_shape input_data = [] # Count extremely random stats. (node_sums, node_squares, splits_indices, splits_sums, splits_squares, totals_indices, totals_sums, totals_squares, input_leaves) = (tensor_forest_ops.count_extremely_random_stats( input_data, sparse_indices, sparse_values, sparse_shape, data_spec, input_labels, input_weights, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, self.variables.start_epoch, epoch, num_classes=self.params.num_output_columns, regression=self.params.regression)) node_update_ops = [] node_update_ops.append( state_ops.assign_add(self.variables.node_sums, node_sums)) splits_update_ops = [] splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums, splits_indices, splits_sums)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums, totals_indices, totals_sums)) if self.params.regression: node_update_ops.append(state_ops.assign_add(self.variables.node_squares, node_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim( self.variables.candidate_split_squares, splits_indices, splits_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares, totals_indices, totals_squares)) # Sample inputs. update_indices, feature_updates, threshold_updates = ( tensor_forest_ops.sample_inputs( input_data, sparse_indices, sparse_values, sparse_shape, input_weights, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = state_ops.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = state_ops.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with ops.control_dependencies(splits_update_ops): # Passing input_leaves to finished nodes here means that nodes that # have become stale won't be deallocated until an input reaches them, # because we're trying to avoid considering every fertile node for # performance reasons. finished, stale = tensor_forest_ops.finished_nodes( input_leaves, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, self.variables.start_epoch, epoch, num_split_after_samples=self.params.split_after_samples, min_split_samples=self.params.min_split_samples, dominate_method=self.params.dominate_method, dominate_fraction=self.params.dominate_fraction) # Update leaf scores. # TODO(thomaswc): Store the leaf scores in a TopN and only update the # scores of the leaves that were touched by this batch of input. children = array_ops.squeeze( array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32( array_ops.squeeze( array_ops.where(is_leaf), squeeze_dims=[1])) non_fertile_leaves = array_ops.boolean_mask( leaves, math_ops.less(array_ops.gather( self.variables.node_to_accumulator_map, leaves), 0)) # TODO(gilberth): It should be possible to limit the number of non # fertile leaves we calculate scores for, especially since we can only take # at most array_ops.shape(finished)[0] of them. with ops.control_dependencies(node_update_ops): sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves) if self.params.regression: squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves) non_fertile_leaf_scores = self._variance(sums, squares) else: non_fertile_leaf_scores = self._weighted_gini(sums) # Calculate best splits. with ops.control_dependencies(splits_update_ops): split_indices = tensor_forest_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, regression=self.params.regression) # Grow tree. with ops.control_dependencies([update_features_op, update_thresholds_op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, new_eot) = (tensor_forest_ops.grow_tree( self.variables.end_of_tree, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = state_ops.scatter_update( self.variables.tree, tree_update_indices, tree_children_updates) thresholds_update_op = state_ops.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) # TODO(thomaswc): Only update the epoch on the new leaves. new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates, dtype=dtypes.int32) epoch_update_op = state_ops.scatter_update( self.variables.start_epoch, tree_update_indices, new_epoch_updates) # Update fertile slots. with ops.control_dependencies([tree_update_op]): (n2a_map_updates, a2n_map_updates, accumulators_cleared, accumulators_allocated) = (tensor_forest_ops.update_fertile_slots( finished, non_fertile_leaves, non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.accumulator_sums, self.variables.node_to_accumulator_map, stale, self.variables.node_sums, regression=self.params.regression)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. gated_new_eot, = control_flow_ops.tuple( [new_eot], control_inputs=[n2a_map_updates]) eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(thresholds_update_op) updates.append(epoch_update_op) updates.append( state_ops.scatter_update(self.variables.node_to_accumulator_map, n2a_map_updates[0], n2a_map_updates[1])) updates.append( state_ops.scatter_update(self.variables.accumulator_to_node_map, a2n_map_updates[0], a2n_map_updates[1])) cleared_and_allocated_accumulators = array_ops.concat_v2( [accumulators_cleared, accumulators_allocated], 0) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. split_values = array_ops.tile( array_ops.expand_dims(array_ops.expand_dims( array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1), 2), [1, self.params.num_splits_to_consider, self.params.num_output_columns]) updates.append(state_ops.scatter_update( self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values)) # Calculate values to put into scatter update for total counts. total_cleared = array_ops.tile( array_ops.expand_dims( math_ops.neg(array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1), [1, self.params.num_output_columns]) total_reset = array_ops.tile( array_ops.expand_dims( array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1), [1, self.params.num_output_columns]) accumulator_updates = array_ops.concat_v2([total_cleared, total_reset], 0) updates.append(state_ops.scatter_update( self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = array_ops.tile( array_ops.expand_dims( math_ops.neg(array_ops.ones_like( cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append(state_ops.scatter_update( self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) updates += self.finish_iteration() return control_flow_ops.group(*updates)
def body(i): new_u = state_ops.assign_add(u, v) new_i = math_ops.add(i, 1) op = control_flow_ops.group(new_u) new_i = control_flow_ops.with_dependencies([op], new_i) return [new_i]
def evaluate_once(checkpoint_path, logdir, master='', num_evals=1, eval_op=None, eval_op_feed_dict=None, final_op=None, final_op_feed_dict=None, summary_op=_USE_DEFAULT, summary_op_feed_dict=None, variables_to_restore=None, session_config=None): """Evaluates the model at the given checkpoint path. Args: checkpoint_path: The path to a checkpoint to use for evaluation. logdir: The directory where the TensorFlow summaries are written to. master: The BNS address of the TensorFlow master. num_evals: The number of times to run `eval_op`. eval_op: A operation run `num_evals` times. eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. final_op: An operation to execute after all of the `eval_op` executions. The value of `final_op` is returned. final_op_feed_dict: A feed dictionary to use when executing `final_op`. summary_op: The summary_op to evaluate after running TF-Slims metric ops. By default the summary_op is set to tf.merge_all_summaries(). summary_op_feed_dict: An optional feed dictionary to use when running the `summary_op`. variables_to_restore: A list of TensorFlow variables to restore during evaluation. If the argument is left as `None` then slim.variables.GetVariablesToRestore() is used. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: The value of `final_op` or `None` if `final_op` is `None`. """ if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() global_step = variables.get_or_create_global_step() init_op = control_flow_ops.group(tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) saver = tf_saver.Saver(variables_to_restore or variables.get_variables_to_restore()) summary_writer = summary_io.SummaryWriter(logdir) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=logdir, init_op=init_op, summary_op=None, summary_writer=None, global_step=None, saver=None) logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) final_op_value = evaluation(sess, num_evals=num_evals, eval_op=eval_op, eval_op_feed_dict=eval_op_feed_dict, final_op=final_op, final_op_feed_dict=final_op_feed_dict, summary_op=summary_op, summary_op_feed_dict=summary_op_feed_dict, summary_writer=summary_writer, global_step=global_step) logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) return final_op_value