def _testMoments(self, dt): try: from scipy import stats # pylint: disable=g-import-not-at-top except ImportError as e: tf_logging.warn("Cannot test moments: %s" % e) return # The moments test is a z-value test. This is the largest z-value # we want to tolerate. Since the z-test approximates a unit normal # distribution, it should almost definitely never exceed 6. z_limit = 6.0 for stride in 0, 1, 4, 17: alphas = [0.2, 1.0, 3.0] if dt == dtypes.float64: alphas = [0.01] + alphas for alpha in alphas: for scale in 9, 17: # Gamma moments only defined for values less than the scale param. max_moment = min(6, scale // 2) sampler = self._Sampler( 20000, alpha, 1 / scale, dt, use_gpu=False, seed=12345) z_scores = util.test_moment_matching( sampler(), max_moment, stats.gamma(alpha, scale=scale), stride=stride, ) self.assertAllLess(z_scores, z_limit)
def surrogate_loss(sample_losses, stochastic_tensors=None, name="SurrogateLoss"): """Surrogate loss for stochastic graphs. This function will call `loss_fn` on each `StochasticTensor` upstream of `sample_losses`, passing the losses that it influenced. Note that currently `surrogate_loss` does not work with `StochasticTensor`s instantiated in `while_loop`s or other control structures. Args: sample_losses: a list or tuple of final losses. Each loss should be per example in the batch (and possibly per sample); that is, it should have dimensionality of 1 or greater. All losses should have the same shape. stochastic_tensors: a list of `StochasticTensor`s to add loss terms for. If None, defaults to all `StochasticTensor`s in the graph upstream of the `Tensor`s in `sample_losses`. name: the name with which to prepend created ops. Returns: `Tensor` loss, which is the sum of `sample_losses` and the `loss_fn`s returned by the `StochasticTensor`s. Raises: TypeError: if `sample_losses` is not a list or tuple, or if its elements are not `Tensor`s. ValueError: if any loss in `sample_losses` does not have dimensionality 1 or greater. """ with ops.op_scope(sample_losses, name): fixed_losses = [] if not isinstance(sample_losses, (list, tuple)): raise TypeError("sample_losses must be a list or tuple") for loss in sample_losses: if not isinstance(loss, ops.Tensor): raise TypeError("loss is not a Tensor: %s" % loss) ndims = loss.get_shape().ndims if not (ndims is not None and ndims >= 1): raise ValueError("loss must have dimensionality 1 or greater: %s" % loss) fixed_losses.append(array_ops.stop_gradient(loss)) stoch_dependencies_map = _stochastic_dependencies_map( fixed_losses, stochastic_tensors=stochastic_tensors) if not stoch_dependencies_map: logging.warn( "No collection of Stochastic Tensors found for current graph.") return math_ops.add_n(sample_losses) # Iterate through all of the stochastic dependencies, adding # surrogate terms where necessary. sample_losses = [ops.convert_to_tensor(loss) for loss in sample_losses] loss_terms = sample_losses for (stoch_node, dependent_losses) in stoch_dependencies_map.items(): loss_term = stoch_node.loss(list(dependent_losses)) if loss_term is not None: loss_terms.append(loss_term) return math_ops.add_n(loss_terms)
def validateKolmogorovSmirnov(self, shape, mean, stddev, minval, maxval, seed=1618): try: import scipy.stats # pylint: disable=g-import-not-at-top random_seed.set_random_seed(seed) with self.test_session(use_gpu=True): samples = random_ops.parameterized_truncated_normal(shape, mean, stddev, minval, maxval).eval() assert (~np.isnan(samples)).all() minval = max(mean - stddev * 10, minval) maxval = min(mean + stddev * 10, maxval) dist = scipy.stats.norm(loc=mean, scale=stddev) cdf_min = dist.cdf(minval) cdf_max = dist.cdf(maxval) def truncated_cdf(x): return np.clip((dist.cdf(x) - cdf_min) / (cdf_max - cdf_min), 0.0, 1.0) pvalue = scipy.stats.kstest(samples, truncated_cdf)[1] self.assertGreater(pvalue, 1e-10) except ImportError as e: tf_logging.warn("Cannot test truncated normal op: %s" % str(e))
def _write_dict_to_summary(output_dir, dictionary, current_global_step): """Writes a `dict` into summary file in given output directory. Args: output_dir: `str`, directory to write the summary file in. dictionary: the `dict` to be written to summary file. current_global_step: `int`, the current global step. """ logging.info('Saving dict for global step %d: %s', current_global_step, dict_to_str(dictionary)) summary_writer = summary_io.SummaryWriterCache.get(output_dir) summary_proto = summary_pb2.Summary() for key in dictionary: if dictionary[key] is None or key == tf.GraphKeys.GLOBAL_STEP: continue value = summary_proto.value.add() value.tag = key if isinstance(dictionary[key], (np.float32, float)): value.simple_value = float(dictionary[key]) elif isinstance(dictionary[key], (int, np.int64, np.int32)): value.simple_value = int(dictionary[key]) else: logging.warn('Skipping summary for %s, must be a ' 'float, np.float32, int, int32, or int64.', key) summary_writer.add_summary(summary_proto, current_global_step) summary_writer.flush()
def _write_dict_to_summary(output_dir, dictionary, current_global_step): """Writes a `dict` into summary file in given output directory. Args: output_dir: `str`, directory to write the summary file in. dictionary: the `dict` to be written to summary file. current_global_step: `int`, the current global step. """ logging.info('Saving dict for global step %d: %s', current_global_step, _dict_to_str(dictionary)) summary_writer = writer_cache.FileWriterCache.get(output_dir) summary_proto = summary_pb2.Summary() for key in dictionary: if dictionary[key] is None: continue if key == 'global_step': continue value = summary_proto.value.add() value.tag = key if (isinstance(dictionary[key], np.float32) or isinstance(dictionary[key], float)): value.simple_value = float(dictionary[key]) elif (isinstance(dictionary[key], np.int64) or isinstance(dictionary[key], np.int32) or isinstance(dictionary[key], int)): value.simple_value = int(dictionary[key]) else: logging.warn( 'Skipping summary for %s, must be a float, np.float32, np.int64, ' 'np.int32 or int.', key) summary_writer.add_summary(summary_proto, current_global_step) summary_writer.flush()
def _log_signature_report(signature_def_map, excluded_signatures): """Log a report of which signatures were produced.""" sig_names_by_method_name = collections.defaultdict(list) # We'll collect whatever method_names are present, but also we want to make # sure to output a line for each of the three standard methods even if they # have no signatures. for method_name in _FRIENDLY_METHOD_NAMES: sig_names_by_method_name[method_name] = [] for signature_name, sig in signature_def_map.items(): sig_names_by_method_name[sig.method_name].append(signature_name) # TODO(b/67733540): consider printing the full signatures, not just names for method_name, sig_names in sig_names_by_method_name.items(): if method_name in _FRIENDLY_METHOD_NAMES: method_name = _FRIENDLY_METHOD_NAMES[method_name] logging.info('Signatures INCLUDED in export for {}: {}'.format( method_name, sig_names if sig_names else 'None')) if excluded_signatures: logging.info('Signatures EXCLUDED from export because they cannot be ' 'be served via TensorFlow Serving APIs:') for signature_name, message in excluded_signatures.items(): logging.info('\'{}\' : {}'.format(signature_name, message)) if not signature_def_map: logging.warn('Export includes no signatures!') elif (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY not in signature_def_map): logging.warn('Export includes no default signature!')
def __init__(self, num_units, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None, name=None): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). Must set to `0.0` manually when restoring from CudnnLSTM-trained checkpoints. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. When restoring from CudnnLSTM-trained checkpoints, must use `CudnnCompatibleLSTMCell` instead. """ super(BasicLSTMCell, self).__init__(_reuse=reuse, name=name) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh
def get_timestamped_dir(dir_base): """Builds a path to a new subdirectory within the base directory. The subdirectory will be named using the current time. This guarantees monotonically increasing directory numbers even across multiple runs of the pipeline. The timestamp used is the number of seconds since epoch UTC. Args: dir_base: A string containing a directory to create the subdirectory under. Returns: The full path of the new subdirectory (which is not actually created yet). Raises: RuntimeError: if repeated attempts fail to obtain a unique timestamped directory name. """ attempts = 0 while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS: timestamp = int(time.time()) result_dir = os.path.join( compat.as_bytes(dir_base), compat.as_bytes(str(timestamp))) if not gfile.Exists(result_dir): # Collisions are still possible (though extremely unlikely): this # directory is not actually created yet, but it will be almost # instantly on return from this function. return result_dir time.sleep(1) attempts += 1 logging.warn('Directory {} already exists; retrying (attempt {}/{})'.format( result_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS)) raise RuntimeError('Failed to obtain a unique export directory name after ' '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS))
def _determinant(self): logging.warn( "Using (possibly slow) default implementation of determinant." " Requires conversion to a dense matrix and O(N^3) operations.") if self._can_use_cholesky(): return math_ops.exp(self.log_abs_determinant()) return linalg_ops.matrix_determinant(self._matrix)
def testDoubleBasic(self): x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float64) y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float64) self._compareBoth(x, y, np.add, math_ops.add) self._compareBoth(x, y, np.subtract, math_ops.subtract) self._compareBoth(x, y, np.multiply, math_ops.multiply) self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv) self._compareBoth(x, y, np.add, _ADD) self._compareBoth(x, y, np.subtract, _SUB) self._compareBoth(x, y, np.multiply, _MUL) self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV) self._compareBoth(x, y, np.arctan2, math_ops.atan2) x1 = np.random.randn(7, 4).astype(np.float64) x2 = np.random.randn(7, 4).astype(np.float64) # Remove tiny values--atan2 gradients are flaky near the origin. x1[np.abs(x1) < 0.5] = 0.5 * np.sign(x1[np.abs(x1) < 0.5]) x2[np.abs(x2) < 0.5] = 0.5 * np.sign(x2[np.abs(x2) < 0.5]) self._compareBoth(x1, x2, np.arctan2, math_ops.atan2) try: from scipy import special # pylint: disable=g-import-not-at-top a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32) x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32) self._compareBoth(a_pos_small, x_pos_small, special.gammainc, math_ops.igamma) self._compareBoth(a_pos_small, x_pos_small, special.gammaincc, math_ops.igammac) except ImportError as e: tf_logging.warn("Cannot test special functions: %s" % str(e))
def __init__(self, num_units, forget_bias=1.0, input_size=None, activation=math_ops.tanh, layer_norm=True, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=1.0, dropout_prob_seed=None): """Initializes the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. activation: Activation function of the inner states. layer_norm: If `True`, layer normalization will be applied. norm_gain: float, The layer normalization gain initial value. If `layer_norm` has been set to `False`, this argument will be ignored. norm_shift: float, The layer normalization shift initial value. If `layer_norm` has been set to `False`, this argument will be ignored. dropout_keep_prob: unit Tensor or float between 0 and 1 representing the recurrent dropout probability value. If float and 1.0, no dropout will be applied. dropout_prob_seed: (optional) integer, the randomness seed. """ if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._forget_bias = forget_bias self._keep_prob = dropout_keep_prob self._seed = dropout_prob_seed self._layer_norm = layer_norm self._g = norm_gain self._b = norm_shift
def __init__(self, num_units, input_size=None, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=False, activation=tanh): # if not state_is_tuple: # logging.warn( # "%s: Using a concatenated state is slower and will soon be " # "deprecated. Use state_is_tuple=True." % self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated." % self) #self._use_peepholes = use_peepholes #self._cell_clip = cell_clip #self._initializer = initializer #self._num_proj = num_proj #self._num_unit_shards = num_unit_shards #self._num_proj_shards = num_proj_shards self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation
def after_create_session(self, training_session, coord): # pylint: disable=unused-argument # N.B. We have to pull the global step here to avoid it being unavailable # at checkpoint time; the graph has been frozen at that point. if training_util.get_global_step() is None and self.saver() is not None: raise ValueError( 'Saver defined but no global step. Run `get_or_create_global_step()`' ' in your model definition to allow checkpointing.') with self._graph.as_default(): logging.info('Installing graceful shutdown hook.') self._session = _clone_session(training_session, self._graph) self._workers = WorkerHeartbeatManager.from_devices( self._session, all_worker_devices(self._session)) self._heartbeat_supported = self._workers.num_workers() > 0 if self._heartbeat_supported: try: self._workers.configure( event_pb2.WorkerHeartbeatRequest( shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR)) except errors.InvalidArgumentError: logging.warn( 'TPU device does not support heartbeats. Failure ' 'handling will be disabled.') self._heartbeat_supported = False else: logging.warn( 'No workers support hearbeats. Failure handling will be disabled.')
def _get_timestamped_export_dir(export_dir_base): # When we create a timestamped directory, there is a small chance that the # directory already exists because another worker is also writing exports. # In this case we just wait one second to get a new timestamp and try again. # If this fails several times in a row, then something is seriously wrong. max_directory_creation_attempts = 10 attempts = 0 while attempts < max_directory_creation_attempts: export_timestamp = int(time.time()) export_dir = os.path.join( compat.as_bytes(export_dir_base), compat.as_bytes( str(export_timestamp))) if not gfile.Exists(export_dir): # Collisions are still possible (though extremely unlikely): this # directory is not actually created yet, but it will be almost # instantly on return from this function. return export_dir time.sleep(1) attempts += 1 logging.warn( "Export directory {} already exists; retrying (attempt {}/{})".format( export_dir, attempts, max_directory_creation_attempts)) raise RuntimeError("Failed to obtain a unique export directory name after " "{} attempts.".format(max_directory_creation_attempts))
def train(self, delay_secs=None): """Fit the estimator using the training data. Train the estimator for `self._train_steps` steps, after waiting for `delay_secs` seconds. If `self._train_steps` is `None`, train forever. Args: delay_secs: Start training after this many seconds. Returns: The trained estimator. """ start = time.time() # Start the server, if needed. It's important to start the server before # we (optionally) sleep for the case where no device_filters are set. # Otherwise, the servers will wait to connect to each other before starting # to train. We might as well start as soon as we can. config = self._estimator.config if isinstance(config, run_config.RunConfig): if (config.cluster_spec and config.master and config.environment == run_config.Environment.LOCAL): logging.warn("ClusterSpec and master are provided, but environment is " "set to 'local'. Set environment to 'cloud' if you intend " "to use the distributed runtime.") if (config.environment != run_config.Environment.LOCAL and config.environment != run_config.Environment.GOOGLE and config.cluster_spec and config.master): self._start_server() elif config.cluster_spec and config.master: raise ValueError( "For distributed runtime, Experiment class only works with " "tf.contrib.learn.RunConfig for now, but provided {}".format( type(config))) extra_hooks = [] if delay_secs is None: task_id = self._estimator.config.task_id or 0 if self._delay_workers_by_global_step: # Wait 5500 global steps for the second worker. Each worker waits more # then previous one but with a diminishing number of steps. extra_hooks.append( basic_session_run_hooks.GlobalStepWaiterHook( int(8000.0 * math.log(task_id + 1)))) delay_secs = 0 else: # Wait 5 secs more for each new worker up to 60 secs. delay_secs = min(60, task_id * 5) if delay_secs > 0: elapsed_secs = time.time() - start remaining = delay_secs - elapsed_secs logging.info("Waiting %d secs before starting training.", remaining) time.sleep(delay_secs) return self._call_train( input_fn=self._train_input_fn, max_steps=self._train_steps, hooks=self._train_monitors + extra_hooks, saving_listeners=self._saving_listeners)
def _garbage_collect_exports(self, export_dir_base): """Deletes older exports, retaining only a given number of the most recent. Export subdirectories are assumed to be named with monotonically increasing integers; the most recent are taken to be those with the largest values. Args: export_dir_base: the base directory under which each export is in a versioned subdirectory. """ if self._exports_to_keep is None: return def _export_version_parser(path): # create a simple parser that pulls the export_version from the directory. filename = os.path.basename(path.path) if not (len(filename) == 10 and filename.isdigit()): return None return path._replace(export_version=int(filename)) # pylint: disable=protected-access keep_filter = gc._largest_export_versions(self._exports_to_keep) delete_filter = gc._negation(keep_filter) for p in delete_filter( gc._get_paths(export_dir_base, parser=_export_version_parser)): try: gfile.DeleteRecursively(p.path) except errors_impl.NotFoundError as e: tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
def additional_score_function_losses(sample_losses, name=None): with ops.op_scope(sample_losses, name, "SampleLosses"): fixed_losses = [] if not isinstance(sample_losses, (list, tuple)): raise TypeError("sample_losses must be a list or tuple") for loss in sample_losses: if not isinstance(loss, ops.Tensor): raise TypeError("loss is not a Tensor: %s" % loss) ndims = loss.get_shape().ndims if not (ndims is not None and ndims <= 1): raise ValueError( "loss must be a scalar or batch-length vector loss: %s" % loss) fixed_losses.append(array_ops.stop_gradient(loss)) stoch_dependencies_map = _stochastic_dependencies_map(fixed_losses) if not stoch_dependencies_map: logging.warn( "No collection of Stochastic Tensors found for current graph.") return [] score_function_losses = [] # Iterate through all of the stochastic dependencies, adding # surrogate terms where necessary. for (stoch_node, dependent_losses) in stoch_dependencies_map.items(): score_function = stoch_node.score_function(list(dependent_losses)) if score_function is not None: with ops.name_scope("ScoreFunction_%s" % stoch_node.name): score_function_losses.append(array_ops.identity(score_function)) return score_function_losses
def _testZeroDensity(self, alpha): """Zero isn't in the support of the gamma distribution. But quantized floating point math has its limits. TODO(bjp): Implement log-gamma sampler for small-shape distributions. Args: alpha: float shape value to test """ try: from scipy import stats # pylint: disable=g-import-not-at-top except ImportError as e: tf_logging.warn("Cannot test zero density proportions: %s" % e) return allowable_zeros = { dtypes.float16: stats.gamma(alpha).cdf(np.finfo(np.float16).tiny), dtypes.float32: stats.gamma(alpha).cdf(np.finfo(np.float32).tiny), dtypes.float64: stats.gamma(alpha).cdf(np.finfo(np.float64).tiny) } failures = [] for use_gpu in [False, True]: for dt in dtypes.float16, dtypes.float32, dtypes.float64: sampler = self._Sampler( 10000, alpha, 1.0, dt, use_gpu=use_gpu, seed=12345) x = sampler() allowable = allowable_zeros[dt] * x.size allowable = allowable * 2 if allowable < 10 else allowable * 1.05 if np.sum(x <= 0) > allowable: failures += [(use_gpu, dt)] self.assertEqual([], failures)
def __init__(self, num_units, input_size=None, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=True, activation=tanh): """Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell input_size: Deprecated and unused. use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight and projection matrices. num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is provided, then the projected values are clipped elementwise to within `[-proj_clip, proj_clip]`. num_unit_shards: How to split the weight matrix. If >1, the weight matrix is stored across num_unit_shards. num_proj_shards: How to split the projection matrix. If >1, the projection matrix is stored across num_proj_shards. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. This latter behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation if num_proj: self._state_size = ( LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = ( LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units
def _changing_default_center_bias(): logging.warn( "Change warning: default value of `enable_centered_bias` will change" " after 2016-10-09. It will be disabled by default." "Instructions for keeping existing behaviour:\n" "Explicitly set `enable_centered_bias` to 'True' if you want to keep " "existing behaviour.")
def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=True, activation=tanh, reuse=None): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation self._reuse = reuse
def __init__(self, num_units, recurrence_depth=1, transfer_bias=-2.0, input_size=None, state_is_tuple=False, activation=tanh): """Initialize the basic RHN cell. Args: num_units: int, The number of units per recurrence depth in the RHN cell. forget_bias: float, The bias added to forget gates (see above). recurrence_depth: int, Number of recurrent layers in the RHN network input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 1-tuple of the `m_state`. By default (False). This default behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True." % self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated." % self) self._num_units = num_units self._recurrence_depth = recurrence_depth self._transfer_bias = transfer_bias self._state_is_tuple = state_is_tuple self._activation = activation
def __init__(self, cell, num_proj, activation=None, input_size=None, reuse=None): """Create a cell with input projection. Args: cell: an RNNCell, a projection of inputs is added before it. num_proj: Python integer. The dimension to project to. activation: (optional) an optional activation function. input_size: Deprecated and unused. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. Raises: TypeError: if cell is not an RNNCell. """ super(InputProjectionWrapper, self).__init__(_reuse=reuse) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) if not _like_rnncell(cell): raise TypeError("The parameter cell is not RNNCell.") self._cell = cell self._num_proj = num_proj self._activation = activation self._linear = None
def _testCompareToExplicitDerivative(self, dtype): """Compare to the explicit reparameterization derivative. Verifies that the computed derivative satisfies dsample / dalpha = d igammainv(alpha, u) / dalpha, where u = igamma(alpha, sample). Args: dtype: TensorFlow dtype to perform the computations in. """ delta = 1e-3 np_dtype = dtype.as_numpy_dtype try: from scipy import misc # pylint: disable=g-import-not-at-top from scipy import special # pylint: disable=g-import-not-at-top alpha_val = np.logspace(-2, 3, dtype=np_dtype) alpha = constant_op.constant(alpha_val) sample = random_ops.random_gamma([], alpha, np_dtype(1.0), dtype=dtype) actual = gradients_impl.gradients(sample, alpha)[0] (sample_val, actual_val) = self.evaluate((sample, actual)) u = special.gammainc(alpha_val, sample_val) expected_val = misc.derivative( lambda alpha_prime: special.gammaincinv(alpha_prime, u), alpha_val, dx=delta * alpha_val) self.assertAllClose(actual_val, expected_val, rtol=1e-3, atol=1e-3) except ImportError as e: tf_logging.warn("Cannot use special functions in a test: %s" % str(e))
def raise_errors(self, timeout_sec=0): """Wait for up to `timeout` seconds for all error sources to finish. Preferentially raise "interesting" errors (errors not in the _UNINTERESTING_ERRORS) set. Args: timeout_sec: Seconds to wait for other error sources. """ for _ in range(timeout_sec): if len(self._errors) == self._num_sources: break time.sleep(1) kept_errors = [(k, v) for (k, v) in self._errors.items() if v is not None] # First check for any interesting errors, then fall back on the session # cancelled errors etc. for k, (typ, value, traceback) in kept_errors: if isinstance(value, _UNINTERESTING_ERRORS): continue else: logging.warn('Reraising captured error') six.reraise(typ, value, traceback) for k, (typ, value, traceback) in kept_errors: logging.warn('Reraising captured error') six.reraise(typ, value, traceback)
def _check_trt_version_compatibility(): """Check compatibility of TensorRT version. Raises: RuntimeError: if the TensorRT library version is incompatible. """ compiled_version = get_linked_tensorrt_version() loaded_version = get_loaded_tensorrt_version() tf_logging.info("Linked TensorRT version: %s" % str(compiled_version)) tf_logging.info("Loaded TensorRT version: %s" % str(loaded_version)) version_mismatch = False if loaded_version[0] < compiled_version[0]: tf_logging.error( "TensorRT version mismatch. Tensorflow was compiled against " + "TensorRT %s but library loaded from environment is TensorRT %s" % (".".join([str(x) for x in compiled_version]), ".".join([str(x) for x in loaded_version])) + ". Please make sure that correct version of TensorRT " + "is available in the system and added to ldconfig or LD_LIBRARY_PATH") raise RuntimeError("Incompatible TensorRT library version") for i in zip(loaded_version, compiled_version): if i[0] != i[1]: tf_logging.warn("TensorRT mismatch. Compiled against version " + "%s, but loaded %s. Things may not work" % (".".join([str(x) for x in compiled_version]), ".".join([str(x) for x in loaded_version]))) version_mismatch = True break if not version_mismatch: tf_logging.info("Running against TensorRT version %s" % ".".join([str(x) for x in loaded_version]))
def __init__(self, num_units, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). Must set to `0.0` manually when restoring from CudnnLSTM-trained checkpoints. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. """ super(BasicLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh
def __init__(self, num_units, input_size=None, activation=tanh, dt_tau=0.1, stddev=0.0, O=None): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated." % self) self._num_units = num_units self._activation = activation self._dt_tau = dt_tau self._sigma = stddev*math.sqrt(2/dt_tau-1) self._O=O
def _export_estimator(estimator, export_dir, signature_fn, input_fn, default_batch_size, exports_to_keep): input_fn = input_fn or _default_input_fn checkpoint_path = tf_saver.latest_checkpoint(estimator._model_dir) with ops.Graph().as_default() as g: contrib_variables.create_global_step(g) examples = array_ops.placeholder(dtype=dtypes.string, shape=[default_batch_size], name='input_example_tensor') features = input_fn(estimator, examples) predictions = estimator._get_predict_ops(features) # Explicit signature_fn takes priority if signature_fn: default_signature, named_graph_signatures = signature_fn(examples, features, predictions) else: try: # Some estimators provide a target_column of known type target_column = estimator._get_target_column() problem_type = target_column.problem_type if problem_type == layers.ProblemType.CLASSIFICATION: signature_fn = classification_signature_fn elif problem_type == layers.ProblemType.LINEAR_REGRESSION: signature_fn = regression_signature_fn elif problem_type == layers.ProblemType.LOGISTIC_REGRESSION: signature_fn = logistic_regression_signature_fn else: raise ValueError( 'signature_fn must be provided because the TargetColumn is a %s, ' 'which does not have a standard problem type and so cannot use a ' 'standard export signature.' % type(target_column).__name__) default_signature, named_graph_signatures = ( signature_fn(examples, features, predictions)) except AttributeError: logging.warn( 'Change warning: `signature_fn` will be required after' '2016-08-01.\n' 'Using generic signatures for now. To maintain this behavior, ' 'pass:\n' ' signature_fn=export.generic_signature_fn\n' 'Also consider passing a regression or classification signature; ' 'see cl/126430915 for an example.') default_signature, named_graph_signatures = generic_signature_fn( examples, features, predictions) if exports_to_keep is not None: exports_to_keep = gc.largest_export_versions(exports_to_keep) _export_graph(g, _get_saver(), checkpoint_path, export_dir, default_graph_signature=default_signature, named_graph_signatures=named_graph_signatures, exports_to_keep=exports_to_keep)
def _log_abs_determinant(self): logging.warn( "Using (possibly slow) default implementation of determinant." " Requires conversion to a dense matrix and O(N^3) operations.") if self._can_use_cholesky(): diag = array_ops.matrix_diag_part(self._get_cached_chol()) return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1]) abs_det = math_ops.abs(self.determinant()) return math_ops.log(abs_det)
def __init__(self, cell, attn_inputs, attn_size, attn_vec_size, output_size=None, input_size=None, state_is_tuple=True, attn_masks=None, merge_output_attn='linear', reuse=None): """Create a cell with attention. Args: cell: an RNNCell, an attention is added to it. attn_inputs: a Tensor. attn_size: integer, the size of an attention vector. Equal to cell.output_size by default. attn_vec_size: integer, the number of convolutional features calculated on attention state and a size of the hidden layer built from base cell state. Equal to attn_size by default. input_size: integer, the size of a hidden linear layer, built from inputs and attention. Derived from the input tensor by default. state_is_tuple: If True, accepted and returned states are n-tuples, where `n = len(cells)`. By default (False), the states are all concatenated along the column axis. attn_mask: mask that should be applied to attention. If None, no masks will be applied. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. Raises: TypeError: if cell is not an RNNCell. ValueError: if cell returns a state tuple but the flag `state_is_tuple` is `False` or if attn_length is zero or less. """ if not isinstance(cell, rnn.RNNCell): raise TypeError('The parameter cell is not RNNCell.') if nest.is_sequence(cell.state_size) and not state_is_tuple: raise ValueError('Cell returns tuple of states, but the flag ' 'state_is_tuple is not set. State size is: %s' % str(cell.state_size)) if not state_is_tuple: logging.warn( '%s: Using a concatenated state is slower and will soon be ' 'deprecated. Use state_is_tuple=True.', self) self._state_is_tuple = state_is_tuple if not state_is_tuple: raise NotImplementedError self._cell = cell self._input_size = input_size self._output_size = output_size if output_size is None: self._output_size = cell.output_size self._attn_size = attn_size self._reuse = reuse self._attn_inputs = attn_inputs self._attn_vec_size = attn_vec_size self.attn_masks = attn_masks self.merge_output_attn = merge_output_attn
def _testMoments(self, dt): try: from scipy import stats # pylint: disable=g-import-not-at-top except ImportError as e: tf_logging.warn("Cannot test moments: %s" % e) return # Check the given array of samples matches the given theoretical moment # function at different orders. The test is considered passing if the # z-tests of all statistical moments are all below z_limit. # Parameters: # max_moments: the largest moments of the distribution to be tested # stride: the distance between samples to check for statistical properties # 0 means the n-th moment of each sample # any other strides tests for spatial correlation between samples; # z_limit: the maximum z-test we would consider the test to pass; # The moments test is a z-value test. This is the largest z-value # we want to tolerate. Since the z-test approximates a unit normal # distribution, it should almost definitely never exceed 6. z_limit = 6.0 for stride in 0, 1, 4, 17: alphas = [0.2, 1.0, 3.0] if dt == dtypes.float64: alphas = [0.01] + alphas for alpha in alphas: for scale in 9, 17: # Gamma moments only defined for values less than the scale param. max_moment = min(6, scale // 2) sampler = self._Sampler(20000, alpha, 1 / scale, dt, use_gpu=False, seed=12345) moments = [0] * (max_moment + 1) moments_sample_count = [0] * (max_moment + 1) x = np.array(sampler().flat) # sampler does 10x samples for k in range(len(x)): moment = 1. for i in range(max_moment + 1): index = k + i * stride if index >= len(x): break moments[i] += moment moments_sample_count[i] += 1 moment *= x[index] for i in range(max_moment + 1): moments[i] /= moments_sample_count[i] for i in range(1, max_moment + 1): g = stats.gamma(alpha, scale=scale) if stride == 0: moments_i_mean = g.moment(i) moments_i_squared = g.moment(2 * i) else: moments_i_mean = pow(g.moment(1), i) moments_i_squared = pow(g.moment(2), i) # Calculate moment variance safely: # This is just # (moments_i_squared - moments_i_mean**2) / moments_sample_count[i] normalized_moments_i_var = ( moments_i_mean / moments_sample_count[i] * (moments_i_squared / moments_i_mean - moments_i_mean)) # Assume every operation has a small numerical error. # It takes i multiplications to calculate one i-th moment. error_per_moment = i * np.finfo(dt.as_numpy_dtype).eps total_variance = (normalized_moments_i_var + error_per_moment) tiny = np.finfo(dt.as_numpy_dtype).tiny self.assertGreaterEqual(total_variance, 0) if total_variance < tiny: total_variance = tiny # z_test is approximately a unit normal distribution. z_test = abs((moments[i] - moments_i_mean) / math.sqrt(total_variance)) self.assertLess(z_test, z_limit)
def __init__(self, num_units, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None): """Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight and projection matrices. num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is provided, then the projected values are clipped elementwise to within `[-proj_clip, proj_clip]`. num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. Must set it manually to `0.0` when restoring from CudnnLSTM trained checkpoints. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. This latter behavior will soon be deprecated. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. When restoring from CudnnLSTM-trained checkpoints, must use CudnnCompatibleLSTMCell instead. """ super(LSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if num_unit_shards is not None or num_proj_shards is not None: logging.warn( "%s: The num_unit_shards and proj_unit_shards parameters are " "deprecated and will be removed in Jan 2017. " "Use a variable scope with a partitioner instead.", self) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units
def train(self, delay_secs=None): """Fit the estimator using the training data. Train the estimator for `self._train_steps` steps, after waiting for `delay_secs` seconds. If `self._train_steps` is `None`, train forever. Args: delay_secs: Start training after this many seconds. Returns: The trained estimator. """ start = time.time() # Start the server, if needed. It's important to start the server before # we (optionally) sleep for the case where no device_filters are set. # Otherwise, the servers will wait to connect to each other before starting # to train. We might as well start as soon as we can. config = self._estimator.config if isinstance(config, run_config.RunConfig): if (config.cluster_spec and config.master and config.environment == run_config.Environment.LOCAL): logging.warn( "ClusterSpec and master are provided, but environment is " "set to 'local'. Set environment to 'cloud' if you intend " "to use the distributed runtime.") if (config.environment != run_config.Environment.LOCAL and config.environment != run_config.Environment.GOOGLE and config.cluster_spec and config.master): self._start_server() elif config.cluster_spec and config.master: raise ValueError( "For distributed runtime, Experiment class only works with" "tf.contrib.learn.RunConfig for now, but provided {}".format( type(config))) extra_hooks = [] if delay_secs is None: task_id = self._estimator.config.task_id or 0 if self._delay_workers_by_global_step: # Wait 5500 global steps for the second worker. Each worker waits more # then previous one but with a diminishing number of steps. extra_hooks.append( basic_session_run_hooks.GlobalStepWaiterHook( int(8000.0 * math.log(task_id + 1)))) delay_secs = 0 else: # Wait 5 secs more for each new worker up to 60 secs. delay_secs = min(60, task_id * 5) if delay_secs > 0: elapsed_secs = time.time() - start remaining = delay_secs - elapsed_secs logging.info("Waiting %d secs before starting training.", remaining) time.sleep(delay_secs) return self._call_train(input_fn=self._train_input_fn, max_steps=self._train_steps, hooks=self._train_monitors + extra_hooks, saving_listeners=self._saving_listeners)
def _export_estimator(estimator, export_dir, signature_fn, input_fn, default_batch_size, exports_to_keep, input_feature_key=None, use_deprecated_input_fn=True, prediction_key=None, checkpoint_path=None): if use_deprecated_input_fn: input_fn = input_fn or _default_input_fn elif input_fn is None: raise ValueError('input_fn must be defined.') # If checkpoint_path is specified, use the specified checkpoint path. checkpoint_path = (checkpoint_path or tf_saver.latest_checkpoint(estimator._model_dir)) with ops.Graph().as_default() as g: training_util.create_global_step(g) if use_deprecated_input_fn: examples = array_ops.placeholder(dtype=dtypes.string, shape=[default_batch_size], name='input_example_tensor') features = input_fn(estimator, examples) else: features, _ = input_fn() examples = None if input_feature_key is not None: examples = features.pop(input_feature_key) if (not features) and (examples is None): raise ValueError('Either features or examples must be defined.') predictions = estimator._get_predict_ops(features).predictions if prediction_key is not None: predictions = predictions[prediction_key] # Explicit signature_fn takes priority if signature_fn: default_signature, named_graph_signatures = signature_fn( examples, features, predictions) else: try: # Some estimators provide a signature function. # TODO(zakaria): check if the estimator has this function, # raise helpful error if not signature_fn = estimator._create_signature_fn() default_signature, named_graph_signatures = (signature_fn( examples, features, predictions)) except AttributeError: logging.warn( 'Change warning: `signature_fn` will be required after' '2016-08-01.\n' 'Using generic signatures for now. To maintain this behavior, ' 'pass:\n' ' signature_fn=export.generic_signature_fn\n' 'Also consider passing a regression or classification signature; ' 'see cl/126430915 for an example.') default_signature, named_graph_signatures = generic_signature_fn( examples, features, predictions) if exports_to_keep is not None: exports_to_keep = gc.largest_export_versions(exports_to_keep) return _export_graph(g, _get_saver(), checkpoint_path, export_dir, default_graph_signature=default_signature, named_graph_signatures=named_graph_signatures, exports_to_keep=exports_to_keep)
def __init__(self, num_units, input_size=None, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=tanh): """Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell input_size: Deprecated and unused. use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight and projection matrices. num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is provided, then the projected values are clipped elementwise to within `[-proj_clip, proj_clip]`. num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. This latter behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) if num_unit_shards is not None or num_proj_shards is not None: logging.warn( "%s: The num_unit_shards and proj_unit_shards parameters are " "deprecated and will be removed in Jan 2017. " "Use a variable scope with a partitioner instead.", self) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation if num_proj: self._state_size = ( LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = ( LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units
def _pfor_impl(loop_fn, iters, fallback_to_while_loop, parallel_iterations=None, pfor_config=None): """Implementation of pfor.""" assert not context.executing_eagerly() loop_fn_has_config = _loop_fn_has_config(loop_fn) existing_ops = set(ops.get_default_graph().get_operations()) # Run the loop body with ops.name_scope("loop_body"): loop_var = array_ops.placeholder_with_default(0, shape=[]) if loop_fn_has_config: if pfor_config is None: pfor_config = PForConfig() pfor_config._set_iters(iters) # pylint: disable=protected-access loop_fn_outputs = loop_fn(loop_var, **{PFOR_CONFIG_ARG: pfor_config}) else: assert pfor_config is None loop_fn_outputs = loop_fn(loop_var) # Convert outputs to Tensor if needed. rewrap_as_ndarray = False tmp_loop_fn_outputs = [] for loop_fn_output in nest.flatten(loop_fn_outputs): if (loop_fn_output is not None and not isinstance( loop_fn_output, (ops.Operation, ops.Tensor, sparse_tensor.SparseTensor))): if isinstance(loop_fn_output, indexed_slices.IndexedSlices): logging.warn("Converting %s to a dense representation may make it slow." " Alternatively, output the indices and values of the" " IndexedSlices separately, and handle the vectorized" " outputs directly." % loop_fn_output) loop_fn_output = ops.convert_to_tensor(loop_fn_output) elif isinstance(loop_fn_output, np_arrays.ndarray): loop_fn_output = loop_fn_output.data rewrap_as_ndarray = True else: loop_fn_output = ops.convert_to_tensor(loop_fn_output) tmp_loop_fn_outputs.append(loop_fn_output) loop_fn_outputs = nest.pack_sequence_as(loop_fn_outputs, tmp_loop_fn_outputs) new_ops = set(ops.get_default_graph().get_operations()) - existing_ops iters = ops.convert_to_tensor(iters) if parallel_iterations is not None: if parallel_iterations < 1: raise ValueError("parallel_iterations must be None or a positive integer") if parallel_iterations == 1: raise ValueError("Found parallel_iterations == 1. Use for_loop instead.") iters_value = tensor_util.constant_value(iters) if iters_value is not None and iters_value < parallel_iterations: parallel_iterations = None if parallel_iterations is None: with ops.name_scope("pfor"): converter = PFor(loop_var, iters, new_ops, fallback_to_while_loop=fallback_to_while_loop, pfor_config=pfor_config) outputs = [] for loop_fn_output in nest.flatten(loop_fn_outputs): output = converter.convert(loop_fn_output) if rewrap_as_ndarray: output = np_arrays.tensor_to_ndarray(output) outputs.append(output) return nest.pack_sequence_as(loop_fn_outputs, outputs) else: if pfor_config is not None and pfor_config._has_reductions(): # pylint: disable=protected-access raise ValueError("Setting parallel_iterations currently unsupported if" " reductions across iterations are performed.") num_tiled_iterations = iters // parallel_iterations num_remaining_iterations = iters % parallel_iterations # TODO(agarwal): Avoid calling loop_fn twice. Generate the loop body inside # a tf.function and extract the graph from there to vectorize it. with ops.name_scope("pfor_untiled"): converter = PFor(loop_var, num_remaining_iterations, new_ops, fallback_to_while_loop=fallback_to_while_loop, pfor_config=pfor_config) remaining_outputs = [] flattened_loop_fn_outputs = nest.flatten(loop_fn_outputs) for loop_fn_output in flattened_loop_fn_outputs: output = converter.convert(loop_fn_output) if rewrap_as_ndarray: output = np_arrays.tensor_to_ndarray(output) remaining_outputs.append(output) with ops.name_scope("pfor_tiled"): loop_fn_dtypes = [ops.convert_to_tensor(x).dtype for x in flattened_loop_fn_outputs] def tiled_loop_body(j): offset = j * parallel_iterations + num_remaining_iterations def tiled_loop_fn(i, pfor_config=None): if loop_fn_has_config: return nest.flatten(loop_fn(i + offset, pfor_config=pfor_config)) else: return nest.flatten(loop_fn(i + offset)) return _pfor_impl( tiled_loop_fn, parallel_iterations, fallback_to_while_loop=fallback_to_while_loop, pfor_config=pfor_config) tiled_outputs = for_loop(tiled_loop_body, loop_fn_dtypes, num_tiled_iterations, parallel_iterations=1) tiled_outputs = [_flatten_first_two_dims(y) for y in tiled_outputs] with ops.name_scope("pfor"): iters_value = tensor_util.constant_value(iters) if iters_value is None or iters_value % parallel_iterations: outputs = control_flow_ops.cond( math_ops.equal(num_remaining_iterations, 0), lambda: tiled_outputs, lambda: [array_ops.concat([x, y], axis=0) for x, y in zip(remaining_outputs, tiled_outputs)]) else: outputs = tiled_outputs flattened_outputs = nest.flatten(outputs) if rewrap_as_ndarray: flattened_outputs = [ np_arrays.tensor_to_ndarray(x) for x in flattened_outputs] return nest.pack_sequence_as(loop_fn_outputs, nest.flatten(outputs))
def _einsum_v1(equation, *inputs, **kwargs): """Legacy implementation of einsum without using EinsumOp.""" name = kwargs.pop('name', None) if kwargs: raise TypeError( 'invalid keyword arguments for this function: ' + ', '.join([format(key) for key in sorted(list(kwargs.keys()))])) with ops.name_scope(name, 'einsum', [equation, inputs]) as name: inputs = list(inputs) input_shapes = [x.shape for x in inputs] input_axis_labels, output_axis_labels = ( _einsum_v1_parse_and_resolve_equation(equation, input_shapes)) axis_labels = set(''.join(input_axis_labels) + output_axis_labels) for a in axis_labels: for input_labels in input_axis_labels: if (len(input_axis_labels) == 1 and input_labels.count(a) == 2 and input_labels == input_labels[::-1] and '->' not in equation): return math_ops.trace(inputs[0]) if input_labels.count(a) > 1: raise ValueError( 'Subscript not supported: an axis appears more than once: %s' % input_labels) for a in axis_labels: input_count = sum(1 for s in input_axis_labels if a in s) if input_count > 2 and a not in output_axis_labels: logging.warn( 'Falling back to exponential-space implementation of einsum()' ' because index "%s" is summed over more than two inputs.', a) return _exponential_space_einsum_v1(equation, *inputs) # Use xla_einsum if executing on TPU and if the operation is a 2 input # einsum supported by XlaEinsumOp. if _enclosing_tpu_context() is not None and len(inputs) == 2: return gen_xla_ops.xla_einsum( inputs[0], inputs[1], input_axis_labels[0] + ',' + input_axis_labels[1] + '->' + output_axis_labels) temp = inputs[0] temp_axis_labels = input_axis_labels[0] for i in xrange(len(inputs) - 1): axes_to_sum = ( set(temp_axis_labels) & set(input_axis_labels[i + 1]) - set(output_axis_labels)) temp, temp_axis_labels = _einsum_v1_reduction( temp, temp_axis_labels, inputs[i + 1], input_axis_labels[i + 1], axes_to_sum) missing_indices = set(temp_axis_labels) - set(output_axis_labels) if missing_indices: axis = [ i for i, a in enumerate(temp_axis_labels) if a not in output_axis_labels ] temp = math_ops.reduce_sum(temp, axis=axis) temp_axis_labels = ''.join(a for a in temp_axis_labels if a in output_axis_labels) if sorted(temp_axis_labels) != sorted(output_axis_labels): raise ValueError('Invalid equation: %s' % equation) perm = [temp_axis_labels.index(a) for a in output_axis_labels] return _transpose_if_necessary(temp, perm)
def _pfor_impl(loop_fn, iters, fallback_to_while_loop, parallel_iterations=None, pfor_config=None): """Implementation of pfor.""" assert not context.executing_eagerly() loop_fn_has_config = _loop_fn_has_config(loop_fn) existing_ops = set(ops.get_default_graph().get_operations()) iters_value = tensor_util.constant_value(iters) # Run the loop body with ops.name_scope("loop_body"): loop_var = array_ops.placeholder_with_default(0, shape=[]) if loop_fn_has_config: if pfor_config is None: pfor_config = PForConfig() pfor_config._set_iters(iters) # pylint: disable=protected-access loop_fn_outputs = loop_fn(loop_var, **{PFOR_CONFIG_ARG: pfor_config}) else: assert pfor_config is None f = autograph.tf_convert(loop_fn, autograph_ctx.control_status_ctx()) loop_fn_outputs = f(loop_var) loop_fn_output_tensors = nest.map_structure(_composite_to_tensors, loop_fn_outputs) # Convert outputs to Tensor if needed. tmp_loop_fn_outputs = [] for loop_fn_output in nest.flatten(loop_fn_output_tensors): if (loop_fn_output is not None and not isinstance( loop_fn_output, (ops.Operation, ops.Tensor, sparse_tensor.SparseTensor))): if isinstance(loop_fn_output, indexed_slices.IndexedSlices): logging.warn( "Converting %s to a dense representation may make it slow." " Alternatively, output the indices and values of the" " IndexedSlices separately, and handle the vectorized" " outputs directly." % loop_fn_output) loop_fn_output = ops.convert_to_tensor(loop_fn_output) else: loop_fn_output = ops.convert_to_tensor(loop_fn_output) tmp_loop_fn_outputs.append(loop_fn_output) loop_fn_output_tensors = nest.pack_sequence_as(loop_fn_output_tensors, tmp_loop_fn_outputs) new_ops = set(ops.get_default_graph().get_operations()) - existing_ops iters = ops.convert_to_tensor(iters) if parallel_iterations is not None: if parallel_iterations < 1: raise ValueError( "Argument `parallel_iterations` must be None or a positive integer. " f"Received: {parallel_iterations}.") if parallel_iterations == 1: raise ValueError( "Found `parallel_iterations == 1`. Use `for_loop` instead.") if iters_value is not None and iters_value < parallel_iterations: parallel_iterations = None if parallel_iterations is None: with ops.name_scope("pfor"): converter = PFor(loop_var, iters, new_ops, fallback_to_while_loop=fallback_to_while_loop, pfor_config=pfor_config) flattened_output_tensors = [] for loop_fn_output in nest.flatten(loop_fn_output_tensors): output = converter.convert(loop_fn_output) flattened_output_tensors.append(output) else: if pfor_config is not None and pfor_config._has_reductions(): # pylint: disable=protected-access raise ValueError( "Setting `parallel_iterations` currently unsupported if " "reductions across iterations are performed.") num_tiled_iterations = iters // parallel_iterations num_remaining_iterations = iters % parallel_iterations # TODO(agarwal): Avoid calling loop_fn twice. Generate the loop body inside # a tf.function and extract the graph from there to vectorize it. with ops.name_scope("pfor_untiled"): converter = PFor(loop_var, num_remaining_iterations, new_ops, fallback_to_while_loop=fallback_to_while_loop, pfor_config=pfor_config) remaining_output_tensors = [] flattened_output_tensors = nest.flatten(loop_fn_output_tensors) for loop_fn_output in flattened_output_tensors: output = converter.convert(loop_fn_output) remaining_output_tensors.append(output) with ops.name_scope("pfor_tiled"): loop_fn_dtypes = [ ops.convert_to_tensor(x).dtype for x in flattened_output_tensors ] def tiled_loop_body(j): offset = j * parallel_iterations + num_remaining_iterations def tiled_loop_fn(i, pfor_config=None): if loop_fn_has_config: loop_fn_outputs = loop_fn(i + offset, pfor_config=pfor_config) else: loop_fn_outputs = loop_fn(i + offset) return nest.flatten( # Stacking across iterations requires explicit Tensors. nest.map_structure(_composite_to_tensors, loop_fn_outputs)) return _pfor_impl( tiled_loop_fn, parallel_iterations, fallback_to_while_loop=fallback_to_while_loop, pfor_config=pfor_config) tiled_output_tensors = for_loop(tiled_loop_body, loop_fn_dtypes, num_tiled_iterations, parallel_iterations=1) tiled_output_tensors = [ _flatten_first_two_dims(y) for y in tiled_output_tensors ] with ops.name_scope("pfor"): if iters_value is None or iters_value % parallel_iterations: output_tensors = control_flow_ops.cond( math_ops.equal(num_remaining_iterations, 0), lambda: tiled_output_tensors, lambda: [ array_ops.concat([x, y], axis=0) # pylint: disable=g-long-lambda for x, y in zip(remaining_output_tensors, tiled_output_tensors) ]) else: output_tensors = tiled_output_tensors flattened_output_tensors = nest.flatten(output_tensors) for output, original_output in zip( flattened_output_tensors, nest.flatten(loop_fn_output_tensors)): # Restore any shape information lost from tiling. # TODO(b/174254748): this may not be correct for stacked `variant`s. output.set_shape( tensor_shape.TensorShape([iters_value]).concatenate( original_output.shape)) return nest.map_structure_up_to( loop_fn_outputs, functools.partial(_composite_from_tensors, batch_size=iters_value), nest.pack_sequence_as(loop_fn_output_tensors, flattened_output_tensors), loop_fn_outputs)
def evaluate(graph, output_dir, checkpoint_path, eval_dict, update_op=None, global_step_tensor=None, supervisor_master='', log_every_steps=10, feed_fn=None, max_steps=None): """Evaluate a model loaded from a checkpoint. Given `graph`, a directory to write summaries to (`output_dir`), a checkpoint to restore variables from, and a `dict` of `Tensor`s to evaluate, run an eval loop for `max_steps` steps, or until an exception (generally, an end-of-input signal from a reader operation) is raised from running `eval_dict`. In each step of evaluation, all tensors in the `eval_dict` are evaluated, and every `log_every_steps` steps, they are logged. At the very end of evaluation, a summary is evaluated (finding the summary ops using `Supervisor`'s logic) and written to `output_dir`. Args: graph: A `Graph` to train. It is expected that this graph is not in use elsewhere. output_dir: A string containing the directory to write a summary to. checkpoint_path: A string containing the path to a checkpoint to restore. Can be `None` if the graph doesn't require loading any variables. eval_dict: A `dict` mapping string names to tensors to evaluate. It is evaluated in every logging step. The result of the final evaluation is returned. If `update_op` is None, then it's evaluated in every step. If `max_steps` is `None`, this should depend on a reader that will raise an end-of-inupt exception when the inputs are exhausted. update_op: A `Tensor` which is run in every step. global_step_tensor: A `Variable` containing the global step. If `None`, one is extracted from the graph using the same logic as in `Supervisor`. Used to place eval summaries on training curves. supervisor_master: The master string to use when preparing the session. log_every_steps: Integer. Output logs every `log_every_steps` evaluation steps. The logs contain the `eval_dict` and timing information. feed_fn: A function that is called every iteration to produce a `feed_dict` passed to `session.run` calls. Optional. max_steps: Integer. Evaluate `eval_dict` this many times. Returns: A tuple `(eval_results, global_step)`: eval_results: A `dict` mapping `string` to numeric values (`int`, `float`) that are the result of running eval_dict in the last step. `None` if no eval steps were run. global_step: The global step this evaluation corresponds to. Raises: ValueError: if `output_dir` is empty. """ if not output_dir: raise ValueError('Output directory should be non-empty %s.' % output_dir) with graph.as_default(): global_step_tensor = contrib_variables.assert_or_get_global_step( graph, global_step_tensor) # Create or get summary op, global_step and saver. saver = _get_saver() local_init_op = _get_local_init_op() ready_op = _get_ready_op() session_manager = session_manager_lib.SessionManager( local_init_op=local_init_op, ready_op=ready_op) session, initialized = session_manager.recover_session( master=supervisor_master, saver=saver, checkpoint_dir=checkpoint_path) # Start queue runners. coord = coordinator.Coordinator() threads = queue_runner.start_queue_runners(session, coord) with session: if not initialized: logging.warning('Failed to initialize from %s.', checkpoint_path) # TODO(ipolosukhin): This should be failing, but old code relies on that. session.run(variables.initialize_all_variables()) if checkpoint_path: _restore_from_checkpoint(session, graph, checkpoint_path, saver) current_global_step = session.run(global_step_tensor) eval_results = None # TODO(amodei): Fix this to run through the eval set exactly once. step = 0 eval_step = None feed_dict = None logging.info('Eval steps [%d,%s) for training step %d.', step, 'inf' if max_steps is None else str(max_steps), current_global_step) try: try: while (max_steps is None) or (step < max_steps): step += 1 start_time = time.time() feed_dict = feed_fn() if feed_fn is not None else None if update_op is not None: session.run(update_op, feed_dict=feed_dict) else: eval_results = session.run(eval_dict, feed_dict=feed_dict) eval_step = step # TODO(wicke): We should assert that the global step hasn't changed. if step % log_every_steps == 0: if eval_step is None or step != eval_step: eval_results = session.run(eval_dict, feed_dict=feed_dict) eval_step = step duration = time.time() - start_time logging.info('Results after %d steps (%.3f sec/batch): %s.', step, float(duration), _eval_results_to_str(eval_results)) finally: if eval_results is None or step != eval_step: eval_results = session.run(eval_dict, feed_dict=feed_dict) eval_step = step # Stop session first, before queue runners. session.close() # Stop queue runners. try: coord.request_stop() coord.join(threads, stop_grace_period_secs=120) except (RuntimeError, errors.CancelledError) as e: logging.warning('Coordinator didn\'t stop cleanly: %s', e) # catch OutOfRangeError which is thrown when queue is out of data (and for # other reasons as well). except errors.OutOfRangeError as e: if max_steps is None: logging.info('Input queue is exhausted.') else: logging.warn('Input queue is exhausted: %s.', e) # catch StopIteration which is thrown is DataReader is out of data. except StopIteration as e: if max_steps is None: logging.info('Input iterator is exhausted.') else: logging.warn('Input iterator is exhausted: %s.', e) # Save summaries for this evaluation. _write_summary_results(output_dir, eval_results, current_global_step) return eval_results, current_global_step
def _train_internal(graph, output_dir, train_op, loss_op, global_step_tensor, init_op, init_feed_dict, init_fn, log_every_steps, supervisor_is_chief, supervisor_master, supervisor_save_model_secs, keep_checkpoint_max, supervisor_save_summaries_steps, feed_fn, steps, fail_on_nan_loss, monitors, max_steps): """See train.""" if (steps is not None) and (max_steps is not None): raise ValueError('Can not provide both steps and max_steps.') if not output_dir: raise ValueError('Output directory should be non-empty %s.' % output_dir) if train_op is None: raise ValueError('Missing train_op.') if loss_op is None: raise ValueError('Missing loss_op.') with graph.as_default(): global_step_tensor = contrib_variables.assert_or_get_global_step( graph, global_step_tensor) if global_step_tensor is None: raise ValueError('No "global_step" was provided or found in the graph.') # Get current step. try: start_step = checkpoints.load_variable( output_dir, global_step_tensor.name) except (errors.NotFoundError, ValueError): start_step = 0 summary_writer = (get_summary_writer(output_dir) if supervisor_is_chief else None) # Add default chief monitors if none were provided. if not monitors: monitors = monitors_lib.get_default_monitors( loss_op=loss_op, summary_op=logging_ops.get_summary_op(), save_summary_steps=supervisor_save_summaries_steps, summary_writer=summary_writer) if supervisor_is_chief else [] # TODO(ipolosukhin): Replace all functionality of Supervisor # with Chief-Exclusive Monitors. if not supervisor_is_chief: # Prune list of monitor to the ones runnable on all workers. monitors = [monitor for monitor in monitors if monitor.run_on_all_workers] if max_steps is None: max_steps = (start_step + steps) if steps else None # Start monitors, can create graph parts. for monitor in monitors: monitor.begin(max_steps=max_steps) supervisor = tf_supervisor.Supervisor( graph, init_op=init_op or tf_supervisor.Supervisor.USE_DEFAULT, init_feed_dict=init_feed_dict, is_chief=supervisor_is_chief, logdir=output_dir, saver=_make_saver(graph, keep_checkpoint_max), global_step=global_step_tensor, summary_op=None, summary_writer=summary_writer, save_model_secs=supervisor_save_model_secs, init_fn=init_fn) session = supervisor.PrepareSession(master=supervisor_master, start_standard_services=True) supervisor.StartQueueRunners(session) with session: get_current_step = lambda: session.run(global_step_tensor) start_step = get_current_step() last_step = start_step last_log_step = start_step loss_value = None logging.info('Training steps [%d,%s)', last_step, 'inf' if max_steps is None else str(max_steps)) excinfo = None try: while not supervisor.ShouldStop() and ( (max_steps is None) or (last_step < max_steps)): start_time = time.time() feed_dict = feed_fn() if feed_fn is not None else None outputs, should_stop = _run_with_monitors( session, last_step + 1, [train_op, loss_op], feed_dict, monitors) loss_value = outputs[loss_op.name] if np.isnan(loss_value): failure_message = 'Model diverged with loss = NaN.' if fail_on_nan_loss: logging.error(failure_message) raise NanLossDuringTrainingError() else: logging.warning(failure_message) if should_stop: break this_step = get_current_step() if this_step <= last_step: logging.error( 'Global step was not incremented by train op at step %s' ': new step %d', last_step, this_step) last_step = this_step is_last_step = (max_steps is not None) and (last_step >= max_steps) if is_last_step or (last_step - last_log_step >= log_every_steps): logging.info( 'training step %d, loss = %.5f (%.3f sec/batch).', last_step, loss_value, float(time.time() - start_time)) last_log_step = last_step except errors.OutOfRangeError as e: logging.warn('Got exception during tf.learn training loop possibly ' 'due to exhausted input queue %s.', e) except StopIteration: logging.info('Exhausted input iterarator.') except BaseException as e: # pylint: disable=broad-except # Hold on to any other exceptions while we try recording a final # checkpoint and summary. excinfo = sys.exc_info() finally: try: # Call supervisor.Stop() from within a try block because it re-raises # exceptions thrown by the supervised threads. supervisor.Stop(close_summary_writer=False) # Save one last checkpoint and summaries # TODO(wicke): This should be handled by Supervisor # In case we encountered an exception in the try block before we updated # last_step, update it here (again). last_step = get_current_step() if supervisor_is_chief: ckpt_path = supervisor.save_path logging.info('Saving checkpoint for step %d to checkpoint: %s.', last_step, ckpt_path) supervisor.saver.save(session, ckpt_path, global_step=last_step) # Finish monitors. for monitor in monitors: monitor.end() # catch OutOfRangeError which is thrown when queue is out of data (and for # other reasons as well). except errors.OutOfRangeError as e: logging.warn('OutOfRangeError in tf.learn final checkpoint possibly ' 'due to exhausted input queue. Note: summary_op is not ' 'expected to trigger dequeues. %s.', e) except BaseException as e: # pylint: disable=broad-except # If we don't already have an exception to re-raise, raise this one. if not excinfo: raise # Otherwise, log this one and raise the other in the finally block. logging.error('Got exception during tf.learn final checkpoint %s.', e) finally: if excinfo: reraise(*excinfo) return loss_value
def embedding_lookup_sparse(params, sp_ids, sp_weights, partition_strategy="mod", name=None, combiner=None, max_norm=None): """Computes embeddings for the given ids and weights. This op assumes that there is at least one id for each row in the dense tensor represented by sp_ids (i.e. there are no rows with empty features), and that all the indices of sp_ids are in canonical row-major order. It also assumes that all id values lie in the range [0, p0), where p0 is the sum of the size of params along dimension 0. Args: params: A single tensor representing the complete embedding tensor, or a list of P tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. sp_weights: either a SparseTensor of float / double weights, or None to indicate all weights should be taken to be 1. If specified, sp_weights must have exactly the same shape and indices as sp_ids. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: Optional name for the op. combiner: A string specifying the reduction op. Currently "mean", "sqrtn" and "sum" are supported. "sum" computes the weighted sum of the embedding results for each row. "mean" is the weighted sum divided by the total weight. "sqrtn" is the weighted sum divided by the square root of the sum of the squares of the weights. max_norm: If provided, each embedding is normalized to have l2 norm equal to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by sp_ids, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if shape(combined params) = [p0, p1, ..., pm] and shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn] then shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id 0, weight 1.0 [2, 3]: id 1, weight 3.0 with `combiner`="mean", then the output will be a 3x20 matrix where output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = params[0, :] * 1.0 output[2, :] = params[1, :] * 3.0 Raises: TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither None nor SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if combiner not in ("mean", "sqrtn", "sum"): raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] if not isinstance(sp_ids, sparse_tensor.SparseTensor): raise TypeError("sp_ids must be SparseTensor") ignore_weights = sp_weights is None if not ignore_weights: if not isinstance(sp_weights, sparse_tensor.SparseTensor): raise TypeError("sp_weights must be either None or SparseTensor") sp_ids.values.get_shape().assert_is_compatible_with( sp_weights.values.get_shape()) sp_ids.indices.get_shape().assert_is_compatible_with( sp_weights.indices.get_shape()) sp_ids.dense_shape.get_shape().assert_is_compatible_with( sp_weights.dense_shape.get_shape()) # TODO(yleon): Add enhanced node assertions to verify that sp_ids and # sp_weights have equal indices and shapes. with ops.name_scope(name, "embedding_lookup_sparse", params + [sp_ids]) as name: segment_ids = sp_ids.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) ids = sp_ids.values if ignore_weights: ids, idx = array_ops.unique(ids) else: idx = None embeddings = embedding_lookup( params, ids, partition_strategy=partition_strategy, max_norm=max_norm) if not ignore_weights: weights = sp_weights.values if weights.dtype != embeddings.dtype: weights = math_ops.cast(weights, embeddings.dtype) # Reshape weights to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1) bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set the weight shape, since after reshaping to bcast_weights_shape, # the shape becomes None. if embeddings.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(embeddings.get_shape().ndims - 1)])) embeddings *= weights if combiner == "sum": embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name) elif combiner == "mean": embeddings = math_ops.segment_sum(embeddings, segment_ids) weight_sum = math_ops.segment_sum(weights, segment_ids) embeddings = math_ops.div(embeddings, weight_sum, name=name) elif combiner == "sqrtn": embeddings = math_ops.segment_sum(embeddings, segment_ids) weights_squared = math_ops.pow(weights, 2) weight_sum = math_ops.segment_sum(weights_squared, segment_ids) weight_sum_sqrt = math_ops.sqrt(weight_sum) embeddings = math_ops.div(embeddings, weight_sum_sqrt, name=name) else: assert False, "Unrecognized combiner" else: assert idx is not None if combiner == "sum": embeddings = math_ops.sparse_segment_sum( embeddings, idx, segment_ids, name=name) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean( embeddings, idx, segment_ids, name=name) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n( embeddings, idx, segment_ids, name=name) else: assert False, "Unrecognized combiner" return embeddings
def __init__(self, num_units, input_size=None, activation=tanh): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation
def inf_nan_callback(op_type, op_name, attrs, inputs, outputs, check_inf=True, check_nan=True, action=_DEFAULT_CALLBACK_ACTION): """An execution callback that checks for `inf`s and `nan`s in output tensors. This callback can be used with `tfe.add_execute_callback` to check for invalid numeric values. E.g., ```python tfe.add_execute_callback(tfe.inf_nan_callback) ``` Args: op_type: Name of the TFE operation type (e.g., `MatMul`). op_name: Name of the TFE operation. This name is set by client and can be `None` if it unset. attrs: Attributes of the TFE operation, as a tuple of alternating attribute names and attribute values. inputs: The `list` of input tensors to the operation, currently unused by this callback. outputs: The `list` of output tensors from the operation, checked by this callback for `inf` and `nan` values. check_inf: (`bool`) Whether this callback should check for `inf` values in the output tensor values. check_nan: (`bool`) Whether this callback should check for `nan` values in the output tensor values. action: (`str`) Action to be taken by the callback when `inf` or `nan` values are detected. Possible values {"raise", "warn", "print"} `"raise"`: Raise a `InfOrNanError`. `"warn"`: Log a warning using `tf.logging.warn`. `"print"`: Print a message to `sys.stdout`. Raises: InfOrNanError: iff `inf` or `nan` values are seen in any of `outputs` and `action` is `"raise"`. ValueError: iff the value of `action` is invalid. """ del attrs, inputs # Not used. ctx = context.get_default_context() for index, output in enumerate(outputs): if not output.dtype.is_numpy_compatible: continue numpy_dtype = output.dtype.as_numpy_dtype if (np.issubdtype(numpy_dtype, np.float) or np.issubdtype(numpy_dtype, np.complex) or np.issubdtype(numpy_dtype, np.integer)): try: check_numerics_op_attrs = ("message", "Eager-mode inf/nan check", "T", outputs[0].dtype.as_datatype_enum) # TODO (cais): Consider moving this into execute.py. id:3190 gh:3191 # pylint: disable=protected-access pywrap_tensorflow.TFE_Py_Execute(ctx._handle, output.device, "CheckNumerics", [output], check_numerics_op_attrs, 1) # pylint: enable=protected-access except core._NotOkStatusException: # pylint: disable=protected-access value = output.numpy() inf_detected = np.any(np.isinf(value)) and check_inf nan_detected = np.any(np.isnan(value)) and check_nan if not inf_detected and not nan_detected: continue error = InfOrNanError(op_type, op_name, index, len(outputs), value) if action == "print": print("Warning: %s" % str(error)) elif action == "warn": logging.warn(str(error)) elif action == "raise": raise error else: raise ValueError( "Invalid action for inf_nan_callback: %s. Valid actions are: " "{print | warn | raise}" % action)
def build(self, input_shape): """Create variables of the Cudnn RNN. It can be called manually before `__call__()` or automatically through `__call__()`. In the former case, subsequent `__call__()`s will skip creating variables. Args: input_shape: network input tensor shape, a python list or a TensorShape object with 3 dimensions. Raises: ValueError: if input_shape has wrong dimension or unknown 3rd dimension. """ if self.built: return input_shape = tensor_shape.TensorShape(input_shape) if input_shape.ndims != 3: raise ValueError("Expecting input_shape with 3 dims, got %d" % input_shape.ndims) if input_shape[-1].value is None: raise ValueError("The last dimension of the inputs to `CudnnRNN` " "should be defined. Found `None`.") self._input_size = input_shape[-1].value self.input_spec = input_spec.InputSpec(ndim=3, axes={-1: self._input_size}) self._set_scope(None) # Not using base class `add_variable()` since the it calls # `tf.compat.v1.get_variable()` with a callable initializer whereas here # with a tensor. The difference is mandated to support forward-compatibility # with Cudnn. with vs.variable_scope(self._scope, reuse=self.built, custom_getter=self._update_trainable_weights): if self._kernel_initializer is None: self._kernel_initializer = init_ops.glorot_uniform_initializer( seed=self._seed, dtype=self._plain_dtype) if self._bias_initializer is None: self._bias_initializer = init_ops.constant_initializer( 0.0, dtype=self._plain_dtype) weights = [ self._kernel_initializer(sp, dtype=self._plain_dtype) for sp in self.canonical_weight_shapes ] biases = [ self._bias_initializer(sp, dtype=self._plain_dtype) for sp in self.canonical_bias_shapes ] opaque_params_t = self._canonical_to_opaque(weights, biases) if vs.get_variable_scope().partitioner is not None: logging.warn( "Partitioner is not supported for Cudnn RNN layer variables, using " "it will create forward-compatibility issues with future " "CUDA/CuDNN generations.") # Initialize opaque params with a tensor with unknown shape, thus couldn't # use self.add_variable(name, shape, initializer, ...) self.kernel = vs.get_variable("opaque_kernel", dtype=self._plain_dtype, initializer=opaque_params_t, validate_shape=False) # Create saveable in the outer scope of the cudnn subgraph, such that # alternative subgraph with platform-independent rnn cells can load the # checkpoints directly. if not (self.built or vs.get_variable_scope().reuse is True): self._create_saveable() self.built = True
def _graph_mode_decorator(f, args, kwargs): """Implement custom gradient decorator for graph mode.""" # TODO(rsepassi): Add support for kwargs if kwargs: raise ValueError( "The custom_gradient decorator currently supports keywords " "arguments only when eager execution is enabled.") name = generate_name() args = nest.map_structure(ops.convert_to_tensor, args) # Checking global and local variables attempts to ensure that no non-resource # Variables are added to the graph. current_var_scope = variable_scope.get_variable_scope() before_vars = set([ v.ref() for v in current_var_scope.global_variables() + current_var_scope.local_variables() ]) with tape_lib.VariableWatcher() as variable_watcher: result, grad_fn = f(*args) args = nest.flatten(args) flat_result = nest.flatten(result) flat_result_len = len(flat_result) after_vars = set([ v.ref() for v in current_var_scope.global_variables() + current_var_scope.local_variables() ]) new_vars = after_vars - before_vars new_vars_list = [v.deref() for v in new_vars] for v in new_vars_list: if not resource_variable_ops.is_resource_variable(v): raise TypeError( "All variables used by a function wrapped with @custom_gradient must " "be `ResourceVariable`s. Ensure that no `variable_scope` is created " "with `use_resource=False`.") # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. variables_in_tape = frozenset( [v.ref() for v in variable_watcher.watched_variables()]) graphs = {getattr(o, "graph", None) for o in flat_result} # Not all results may be tensors. However, we want to ensure all tensor # outputs are from the same graph and get a list of captured inputs for # variable search graphs.discard(None) # Discard non-graph outputs if graphs: if len(graphs) > 1: raise ValueError( "All custom_gradient outputs should be from the same graph") output_graph = graphs.pop() filtered_input_tensors = [] for i in args: if i.graph == output_graph: filtered_input_tensors.append(i) else: filtered_input_tensors = args variables_in_subgraph = frozenset([ v.ref() for v in _get_dependent_variables(input_ops=filtered_input_tensors, output_ops=flat_result) ]) variables = sorted( [v.deref() for v in variables_in_subgraph.union(variables_in_tape)], key=lambda v: v.name) grad_argspec = tf_inspect.getfullargspec(grad_fn) variables_in_signature = ("variables" in grad_argspec.args or "variables" in grad_argspec.kwonlyargs or grad_argspec.varkw) if variables and not variables_in_signature: raise TypeError( "@tf.custom_gradient grad_fn must accept keyword argument 'variables', " "since function uses variables: {}".format(variables)) if variables_in_signature and not variables: # User seems to intend to use variables but none were captured. logging.warn( "@custom_gradient grad_fn has 'variables' in signature, but " "no ResourceVariables were used on the forward pass.") all_tensors = flat_result + args + variables def tape_grad_fn(*result_grads): """Custom grad fn wrapper.""" result_grads = result_grads[:flat_result_len] if variables: input_grads, variable_grads = grad_fn(*result_grads, variables=variables) if len(variable_grads) != len(variables): raise ValueError("Must return gradient for each variable from " "@custom_gradient grad_fn.") else: input_grads = grad_fn(*result_grads) variable_grads = [] # Need to return one value per input to the IdentityN, so pad the # gradients of the inputs of the custom_gradient function with the # gradients of the outputs as well. input_grads = nest.flatten(input_grads) return ([None] * flat_result_len) + input_grads + variable_grads @ops.RegisterGradient(name) def internal_grad_fn(unused_op, *result_grads): # pylint: disable=unused-variable """Custom grad fn wrapper.""" return tape_grad_fn(*result_grads) original_tensors = all_tensors with ops.get_default_graph().gradient_override_map({"IdentityN": name}): all_tensors = array_ops.identity_n(all_tensors) original_tensors = [ops.convert_to_tensor(x) for x in original_tensors] # Propagate handle data for happier shape inference for resource variables. for i, t in enumerate(original_tensors): if t.dtype == dtypes.resource and hasattr(t, "_handle_data"): all_tensors[i]._handle_data = t._handle_data # pylint: disable=protected-access tape_lib.record_operation(f.__name__, all_tensors, original_tensors, tape_grad_fn) for ot, t in zip(original_tensors, all_tensors): handle_data_util.copy_handle_data(ot, t) return nest.pack_sequence_as(structure=result, flat_sequence=all_tensors[:flat_result_len])
def create_partitioned_variables(shape, slicing, initializer, dtype=dtypes.float32, trainable=True, collections=None, name=None, reuse=None): """Create a list of partitioned variables according to the given `slicing`. Currently only one dimension of the full variable can be sliced, and the full variable can be reconstructed by the concatenation of the returned list along that dimension. Args: shape: List of integers. The shape of the full variable. slicing: List of integers. How to partition the variable. Must be of the same length as `shape`. Each value indicate how many slices to create in the corresponding dimension. Presently only one of the values can be more than 1; that is, the variable can only be sliced along one dimension. For convenience, The requested number of partitions does not have to divide the corresponding dimension evenly. If it does not, the shapes of the partitions are incremented by 1 starting from partition 0 until all slack is absorbed. The adjustment rules may change in the future, but as you can save/restore these variables with different slicing specifications this should not be a problem. initializer: A `Tensor` of shape `shape` or a variable initializer function. If a function, it will be called once for each slice, passing the shape and data type of the slice as parameters. The function must return a tensor with the same shape as the slice. dtype: Type of the variables. Ignored if `initializer` is a `Tensor`. trainable: If True also add all the variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. collections: List of graph collections keys to add the variables to. Defaults to `[GraphKeys.VARIABLES]`. name: Optional name for the full variable. Defaults to `"PartitionedVariable"` and gets uniquified automatically. reuse: Boolean or `None`; if `True` and name is set, it would reuse previously created variables. if `False` it will create new variables. if `None`, it would inherit the parent scope reuse. Returns: A list of Variables corresponding to the slicing. Raises: ValueError: If any of the arguments is malformed. """ logging.warn("create_partitioned_variables is deprecated. Use " "tf.get_variable with a partitioner set, or " "tf.get_partitioned_variable_list, instead.") if len(shape) != len(slicing): raise ValueError("The 'shape' and 'slicing' of a partitioned Variable " "must have the length: shape: %s, slicing: %s" % (shape, slicing)) if len(shape) < 1: raise ValueError("A partitioned Variable must have rank at least 1: " "shape: %s" % shape) # Legacy: we are provided the slicing directly, so just pass it to # the partitioner. partitioner = lambda **unused_kwargs: slicing with variable_scope.variable_op_scope([], name, "PartitionedVariable", reuse=reuse) as scope: # pylint: disable=protected-access vs, _ = variable_scope._get_partitioned_variable_list( name="part", shape=shape, dtype=dtype, initializer=initializer, trainable=trainable, partitioner=partitioner, collections=collections) for var in vs: var._save_slice_info.full_name = scope.name # pylint: enable=protected-access return vs
def einsum(equation, *inputs): """ A generalized contraction between tensors of arbitrary dimension. This function returns a tensor whose elements are defined by `equation`, which is written in a shorthand form inspired by the Einstein summation convention. As an example, consider multiplying two matrices A and B to form a matrix C. The elements of C are given by: ``` C[i,k] = sum_j A[i,j] * B[j,k] ``` The corresponding `equation` is: ``` ij,jk->ik ``` In general, the `equation` is obtained from the more familiar element-wise equation by 1. removing variable names, brackets, and commas, 2. replacing "*" with ",", 3. dropping summation signs, and 4. moving the output to the right, and replacing "=" with "->". Many common operations can be expressed in this way. For example: ```python # Matrix multiplication >>> einsum('ij,jk->ik', m0, m1) # output[i,k] = sum_j m0[i,j] * m1[j, k] # Dot product >>> einsum('i,i->', u, v) # output = sum_i u[i]*v[i] # Outer product >>> einsum('i,j->ij', u, v) # output[i,j] = u[i]*v[j] # Transpose >>> einsum('ij->ji', m) # output[j,i] = m[i,j] # Batch matrix multiplication >>> einsum('aij,ajk->aik', s, t) # out[a,i,k] = sum_j s[a,i,j] * t[a, j, k] ``` This function behaves like `numpy.einsum`, but does not support: * Ellipses (subscripts like `ij...,jk...->ik...`) * Subscripts where an axis appears more than once for a single input (e.g. `ijj,k->ik`). * Subscripts that are summed across multiple inputs (e.g., `ij,ij,jk->ik`). Args: equation: a `str` describing the contraction, in the same format as `numpy.einsum`. inputs: the inputs to contract (each one a `Tensor`), whose shapes should be consistent with `equation`. Returns: The contracted `Tensor`, with shape determined by `equation`. Raises: ValueError: If - the format of `equation` is incorrect, - the number of inputs implied by `equation` does not match `len(inputs)`, - an axis appears in the output subscripts but not in any of the inputs, - the number of dimensions of an input differs from the number of indices in its subscript, or - the input shapes are inconsistent along a particular axis. """ if '...' in equation: raise ValueError("Subscripts with ellipses are not yet supported.") match = re.match('([a-z,]+)(->[a-z]*)?', equation) if not match: raise ValueError( 'Indices have incorrect format: %s' % equation ) inputs = list(inputs) input_axis_labels = match.group(1).split(',') if len(inputs) != len(input_axis_labels): raise ValueError('Got %d arguments for equation "%s", expecting %d' % ( len(inputs), equation, len(input_axis_labels))) axis_labels = set(''.join(input_axis_labels)) if match.group(2): output_axis_labels = match.group(2)[2:] else: # infer the output subscripts if not given, assume alphabetical order indices = ''.join(sorted(axis_labels)) counts = {ax: 0 for ax in indices} for axes_ in input_axis_labels: for ax in axes_: counts[ax] += 1 output_axis_labels = ''.join(sorted( ax for ax in indices if counts[ax] == 1 )) for a in axis_labels: input_count = sum(1 for s in input_axis_labels if a in s) if input_count > 2 and a not in output_axis_labels: logging.warn( 'Falling back to exponential-space implementation of einsum() because' ' index "%s" is summed over more than two inputs.', a) return _exponential_space_einsum(equation, *inputs) temp = inputs[0] temp_axis_labels = input_axis_labels[0] for i in xrange(len(inputs)-1): axes_to_sum = (set(temp_axis_labels) & set(input_axis_labels[i+1]) - set(output_axis_labels)) temp, temp_axis_labels = _einsum_reduction(temp, temp_axis_labels, inputs[i+1], input_axis_labels[i+1], axes_to_sum) missing_indices = set(temp_axis_labels) - set(output_axis_labels) if missing_indices: reduction_indices = [i for i, a in enumerate(temp_axis_labels) if a not in output_axis_labels] temp = math_ops.reduce_sum(temp, reduction_indices=reduction_indices) temp_axis_labels = ''.join(a for a in temp_axis_labels if a in output_axis_labels) if sorted(temp_axis_labels) != sorted(output_axis_labels): raise ValueError('Invalid equation: %s' % equation) perm = [temp_axis_labels.index(a) for a in output_axis_labels] return _transpose_if_necessary(temp, perm)
def main(unused_argv=None): logging.set_verbosity(logging.INFO) tf_version = versions.__version__ print('TensorFlow version %s detected' % tf_version) print('Welcome to the Cloud TPU Profiler v%s' % profiler_version.__version__) if LooseVersion(tf_version) < LooseVersion('2.2.0'): sys.exit('You must install tensorflow >= 2.2.0 to use this plugin.') if not FLAGS.service_addr and not FLAGS.tpu: sys.exit('You must specify either --service_addr or --tpu.') tpu_cluster_resolver = None if FLAGS.service_addr: if FLAGS.tpu: logging.warn('Both --service_addr and --tpu are set. Ignoring ' '--tpu and using --service_addr.') service_addr = FLAGS.service_addr else: try: tpu_cluster_resolver = (resolver.TPUClusterResolver( [FLAGS.tpu], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) service_addr = tpu_cluster_resolver.get_master() except (ValueError, TypeError): sys.exit( 'Failed to find TPU %s in zone %s project %s. You may use ' '--tpu_zone and --gcp_project to specify the zone and project of' ' your TPU.' % (FLAGS.tpu, FLAGS.tpu_zone, FLAGS.gcp_project)) service_addr = service_addr.replace('grpc://', '').replace(':8470', ':8466') workers_list = '' if FLAGS.workers_list is not None: workers_list = FLAGS.workers_list elif tpu_cluster_resolver is not None: workers_list = get_workers_list(tpu_cluster_resolver) # If profiling duration was not set by user or set to a non-positive value, # we set it to a default value of 1000ms. duration_ms = FLAGS.duration_ms if FLAGS.duration_ms > 0 else 1000 if FLAGS.monitoring_level > 0: print('Since monitoring level is provided, profile', service_addr, ' for ', FLAGS.duration_ms, ' ms and show metrics for ', FLAGS.num_queries, ' time(s).') monitoring_helper(service_addr, duration_ms, FLAGS.monitoring_level, FLAGS.num_queries) else: if not FLAGS.logdir: sys.exit('You must specify either --logdir or --monitoring_level.') if not gfile.Exists(FLAGS.logdir): gfile.MakeDirs(FLAGS.logdir) try: if LooseVersion(tf_version) < LooseVersion('2.3.0'): profiler_client.trace(service_addr, os.path.expanduser(FLAGS.logdir), duration_ms, workers_list, FLAGS.num_tracing_attempts) else: options = profiler.ProfilerOptions( host_tracer_level=FLAGS.host_tracer_level) profiler_client.trace(service_addr, os.path.expanduser(FLAGS.logdir), duration_ms, workers_list, FLAGS.num_tracing_attempts, options) except errors.UnavailableError: sys.exit(0)
def __init__(self, input_saved_model_dir=None, input_saved_model_tags=None, input_saved_model_signature_key=None, input_graph_def=None, nodes_blacklist=None, session_config=None, max_batch_size=1, max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, precision_mode=TrtPrecisionMode.FP32, minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1, use_calibration=True): """Initialize the converter. Args: input_saved_model_dir: the directory to load the SavedModel which contains the input graph to transforms. Used only when input_graph_def is None. input_saved_model_tags: list of tags to load the SavedModel. input_saved_model_signature_key: the key of the signature to optimize the graph for. input_graph_def: a GraphDef object containing a model to be transformed. If set to None, the graph will be read from the SavedModel loaded from input_saved_model_dir. nodes_blacklist: list of node names to prevent the converter from touching. session_config: the ConfigProto used to create a Session. It's also used as a template to create a TRT-enabled ConfigProto for conversion. If not specified, a default ConfigProto will be used. max_batch_size: max size for the input batch. max_workspace_size_bytes: the maximum GPU temporary memory which the TRT engine can use at execution time. This corresponds to the 'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize(). precision_mode: one of TrtPrecisionMode.supported_precision_modes(). minimum_segment_size: the minimum number of nodes required for a subgraph to be replaced by TRTEngineOp. is_dynamic_op: whether to generate dynamic TRT ops which will build the TRT network and engine at run time. maximum_cached_engines: max number of cached TRT engines in dynamic TRT ops. If the number of cached engines is already at max but none of them can serve the input, the TRTEngineOp will fall back to run the TF function based on which the TRTEngineOp is created. use_calibration: this argument is ignored if precision_mode is not INT8. If set to True, a calibration graph will be created to calibrate the missing ranges. The calibration graph must be converted to an inference graph by running calibration with calibrate(). If set to False, quantization nodes will be expected for every tensor in the graph (exlcuding those which will be fused). If a range is missing, an error will occur. Please note that accuracy may be negatively affected if there is a mismatch between which tensors TRT quantizes and which tensors were trained with fake quantization. Raises: ValueError: if the combination of the parameters is invalid. RuntimeError: if this class is used in TF 2.0. """ if context.executing_eagerly(): raise RuntimeError("Please use TrtGraphConverterV2 in TF 2.0.") if input_graph_def and input_saved_model_dir: raise ValueError( "Can only specify one of input_graph_def and input_saved_model_dir" ) if not input_graph_def and not input_saved_model_dir: raise ValueError("Must specify one of input_graph_def and " "input_saved_model_dir") _check_trt_version_compatibility() self._input_graph_def = input_graph_def self._nodes_blacklist = nodes_blacklist self._input_saved_model_dir = input_saved_model_dir self._converted = False self._grappler_meta_graph_def = None self._input_saved_model_tags = (input_saved_model_tags or [tag_constants.SERVING]) self._input_saved_model_signature_key = ( input_saved_model_signature_key or signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) self._session_config = session_config or config_pb2.ConfigProto() # For calibration usage. self._calibration_graph = None self._calibration_data_collected = False self._need_calibration = (precision_mode == TrtPrecisionMode.INT8 and use_calibration) if self._need_calibration and not is_dynamic_op: tf_logging.warn( "INT8 precision mode with calibration is supported with " "dynamic TRT ops only. Disregarding is_dynamic_op parameter.") is_dynamic_op = True # TODO(laigd): # - Verify in int8 mode that maximum_cached_engines is set properly. # - If it fails to build the int8 engine it should return error. rewriter_config_template = None if (session_config and session_config.HasField("graph_options") and session_config.graph_options.HasField("rewrite_options")): rewriter_config_template = session_config.graph_options.rewrite_options self._conversion_params = TrtConversionParams( rewriter_config_template=rewriter_config_template, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, maximum_cached_engines=maximum_cached_engines, use_calibration=use_calibration, max_batch_size=max_batch_size) _check_conversion_params(self._conversion_params)
def _check_dtype(dtype): if dtypes.as_dtype(dtype) == dtypes.float64: logging.warn( 'float64 is not supported by many models, consider casting to float32.' ) return dtype
def scattered_embedding_lookup_sparse(params, sparse_values, dimension, combiner=None, default_value=None, name=None, hash_key=None): """Looks up embeddings of a sparse feature using parameter hashing. See `tf.contrib.layers.scattered_embedding_lookup` for embedding with hashing. Args: params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`. Each tensor must be of rank 1 with fully-defined shape. sparse_values: A 2-D `SparseTensor` containing the values to be embedded. Some rows may be empty. dimension: Embedding dimension combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_value: The value to use for an entry with no features. name: An optional name for this op. hash_key: Specify the hash_key that will be used by the `FingerprintCat64` function to combine the crosses fingerprints on SparseFeatureCrossOp (optional). Returns: Dense tensor with shape [N, dimension] with N the number of rows in sparse_values. Raises: TypeError: If sparse_values is not a SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if isinstance(params, variables.PartitionedVariable): params = list(params) if not isinstance(params, list): params = [params] if not isinstance(sparse_values, sparse_tensor.SparseTensor): raise TypeError("sparse_values must be SparseTensor") with ops.name_scope(name, "scattered_embedding_lookup_sparse", params + [sparse_values]) as scope: # Fill in the empty rows. if default_value is None: # Random default values to reduce the risk of collision. if sparse_values.dtype == dtypes.string: default_value = "6ZxWzWOHxZ" else: default_value = 1288896567 sparse_values, _ = sparse_ops.sparse_fill_empty_rows( sparse_values, default_value) segment_ids = sparse_values.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) values = sparse_values.values values, idx = array_ops.unique(values) embeddings = scattered_embedding_lookup( params, values, dimension, hash_key=hash_key) if combiner == "sum": embeddings = math_ops.sparse_segment_sum( embeddings, idx, segment_ids, name=scope) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean( embeddings, idx, segment_ids, name=scope) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n( embeddings, idx, segment_ids, name=scope) else: raise ValueError("Combiner must be one of 'mean', 'sqrtn' or 'sum'.") return embeddings
def __init__(self, num_units, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None, name=None, dtype=None): """Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell. use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight and projection matrices. num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is provided, then the projected values are clipped elementwise to within `[-proj_clip, proj_clip]`. num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a variable_scope partitioner instead. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. Must set it manually to `0.0` when restoring from CudnnLSTM trained checkpoints. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. This latter behavior will soon be deprecated. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. dtype: Default dtype of the layer (default of `None` means use the type of the first input). Required when `build` is called before `call`. When restoring from CudnnLSTM-trained checkpoints, use `CudnnCompatibleLSTMCell` instead. """ super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) # TODO(raziel): decide if we want to just support tuples (yes please!). if not state_is_tuple: logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if num_unit_shards is not None or num_proj_shards is not None: logging.warn( "%s: The num_unit_shards and proj_unit_shards parameters are " "deprecated and will be removed in Jan 2017. " "Use a variable scope with a partitioner instead.", self) # Inputs must be 2-dimensional. # TODO(raziel): layers stuff -- chop if un-layerizing Op. self.input_spec = base_layer.InputSpec(ndim=2) self._tflite_wrapper = op_hint.OpHint("UnidirectionalSequenceLstm") self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh self._output_size = num_proj if num_proj else num_units self._state_size = ( rnn_cell_impl.LSTMStateTuple(num_units, self._output_size) if state_is_tuple else num_units + self._output_size)
def __init__(self, estimator, train_input_fn, eval_input_fn, eval_metrics=None, train_steps=None, eval_steps=100, train_monitors=None, eval_hooks=None, local_eval_frequency=None, eval_delay_secs=120, continuous_eval_throttle_secs=60, min_eval_frequency=None, delay_workers_by_global_step=False, export_strategies=None, train_steps_per_iteration=None, checkpoint_and_export=False, saving_listeners=None, check_interval_secs=5): """Constructor for `Experiment`. Creates an Experiment instance. None of the functions passed to this constructor are executed at construction time. They are stored and used when a method is executed which requires it. Args: estimator: Object implementing Estimator interface, which could be a combination of @{tf.contrib.learn.Trainable} and @{tf.contrib.learn.Evaluable} (deprecated), or @{tf.estimator.Estimator}. train_input_fn: function, returns features and labels for training. eval_input_fn: function, returns features and labels for evaluation. If `eval_steps` is `None`, this should be configured only to produce for a finite number of batches (generally, 1 epoch over the evaluation data). eval_metrics: `dict` of string, metric function. If `None`, default set is used. This should be `None` if the `estimator` is @{tf.estimator.Estimator}. If metrics are provided they will be *appended* to the default set. train_steps: Perform this many steps of training. `None`, the default, means train forever. eval_steps: `evaluate` runs until input is exhausted (or another exception is raised), or for `eval_steps` steps, if specified. train_monitors: A list of monitors to pass to the `Estimator`'s `fit` function. eval_hooks: A list of `SessionRunHook` hooks to pass to the `Estimator`'s `evaluate` function. local_eval_frequency: (applies only to local_run) Frequency of running eval in steps. If `None`, runs evaluation only at the end of training. eval_delay_secs: Start evaluating after waiting for this many seconds. continuous_eval_throttle_secs: Do not re-evaluate unless the last evaluation was started at least this many seconds ago for continuous_eval(). min_eval_frequency: (applies only to train_and_evaluate). the minimum number of steps between evaluations. Of course, evaluation does not occur if no new snapshot is available, hence, this is the minimum. If 0, the evaluation will only happen after training. If None, defaults to 1. To avoid checking for new checkpoints too frequent, the interval is further limited to be at least check_interval_secs between checks. delay_workers_by_global_step: if `True` delays training workers based on global step instead of time. export_strategies: Iterable of `ExportStrategy`s, or a single one, or `None`. train_steps_per_iteration: (applies only to continuous_train_and_eval). Perform this many (integer) number of train steps for each training-evaluation iteration. With a small value, the model will be evaluated more frequently with more checkpoints saved. If `None`, will use a default value (which is smaller than `train_steps` if provided). checkpoint_and_export: (applies only to train_and_evaluate). If `True`, performs intermediate model checkpoints and exports during the training process, rather than only once model training is complete. This parameter is experimental and may be changed or removed in the future. Setting this parameter leads to the following: the value of `min_eval_frequency` will be ignored, and the number of steps between evaluations and exports will instead be determined by the Estimator configuration parameters `save_checkpoints_secs` and `save_checkpoints_steps`. Also, this parameter leads to the creation of a default `CheckpointSaverHook` instead of a `ValidationMonitor`, so the provided `train_monitors` will need to be adjusted accordingly. saving_listeners: list of `CheckpointSaverListener` objects. Used by tf.estimator.Estimator for callbacks that run immediately before or after checkpoint savings. check_interval_secs: Minimum time between subsequent checks for a new checkpoint. This mostly applies if both min_eval_frequency and the time spent per training step is low. Raises: ValueError: if `estimator` does not implement Estimator interface, or if export_strategies has the wrong type. """ if isinstance(estimator, core_estimator.Estimator): self._core_estimator_used = True if eval_metrics is not None: raise ValueError( "`eval_metrics` must be `None` with `tf.estimator.Estimator`. " "Use `eval_metric_ops` in `tf.estimator.EstimatorSpec` instead." ) else: self._core_estimator_used = False if not isinstance(estimator, evaluable.Evaluable): raise ValueError( "`estimator` must implement `tf.contrib.learn.Evaluable` " "or `tf.estimator.Estimator`.") if not isinstance(estimator, trainable.Trainable): raise ValueError( "`estimator` must implement `tf.contrib.learn.Trainable`" "or `tf.estimator.`Estimator`.") if saving_listeners is not None: raise ValueError("`saving_listeners` must be `None` with " "`tf.contrib.learn.Estimator`.") if isinstance(estimator, tpu_estimator.TPUEstimator): logging.warn( "`Experiment` class cannot work with `tf.contrib.tpu.TPUEstimator`. " "Please call `TPUEstimator` train/evaluate directly. \n" "Details: `Experiment` class is designed for between-graph " "distributed training, while `TPUEstimator` is working in in-graph " "distributed mode. Use with care.") super(Experiment, self).__init__() # Immutable fields. self._estimator = estimator self._train_input_fn = train_input_fn self._eval_input_fn = eval_input_fn self._eval_metrics = eval_metrics self._train_steps = train_steps self._eval_steps = eval_steps self._local_eval_frequency = local_eval_frequency self._eval_delay_secs = eval_delay_secs self._continuous_eval_throttle_secs = continuous_eval_throttle_secs self._checkpoint_and_export = checkpoint_and_export self._saving_listeners = saving_listeners self._min_eval_frequency = min_eval_frequency if (min_eval_frequency is not None) else 1 self._check_interval_secs = check_interval_secs self._delay_workers_by_global_step = delay_workers_by_global_step self._train_monitors = train_monitors[:] if train_monitors else [] self._eval_hooks = eval_hooks[:] if eval_hooks else [] self._set_export_strategies(export_strategies) self._train_steps_per_iteration = train_steps_per_iteration if (self._train_steps_per_iteration is not None and not isinstance(self._train_steps_per_iteration, int)): raise ValueError("`train_steps_per_iteration` must be an integer.")
def _add_children_recreated_from_config(self, obj, proto, node_id): """Recursively records objects recreated from config.""" # pylint: disable=protected-access if node_id in self._traversed_nodes_from_config: return self._traversed_nodes_from_config.append(node_id) obj._maybe_initialize_trackable() if isinstance(obj, base_layer.Layer) and not obj.built: metadata = json_utils.decode(proto.user_object.metadata) self._try_build_layer(obj, node_id, metadata.get('build_input_shape')) # Create list of all possible children children = [] # Look for direct children for reference in proto.children: obj_child = obj._lookup_dependency(reference.local_name) children.append((obj_child, reference.node_id)) # Add metrics that may have been added to the layer._metrics list. # This is stored in the SavedModel as layer.keras_api.layer_metrics in # SavedModels created after Tf 2.2. metric_list_node_id = self._search_for_child_node( node_id, [constants.KERAS_ATTR, 'layer_metrics'], raise_error=False) if metric_list_node_id is not None and hasattr(obj, '_metrics'): obj_metrics = {m.name: m for m in obj._metrics} for reference in self._proto.nodes[metric_list_node_id].children: metric = obj_metrics.get(reference.local_name) if metric is not None: children.append((metric, reference.node_id)) for (obj_child, child_id) in children: child_proto = self._proto.nodes[child_id] if not isinstance(obj_child, trackable.Trackable): continue if (child_proto.user_object.identifier in revived_types.registered_identifiers()): setter = revived_types.get_setter(child_proto.user_object) elif obj_child._object_identifier in KERAS_OBJECT_IDENTIFIERS: setter = _revive_setter else: setter = setattr # pylint: enable=protected-access if (child_id in self._nodes_recreated_from_config and self._nodes_recreated_from_config[child_id][0] is not obj_child): # This means that the same trackable object is referenced by two # different objects that were recreated from the config. logging.warn( 'Looks like there is an object (perhaps variable or layer)' ' that is shared between different layers/models. This ' 'may cause issues when restoring the variable values.' 'Object: {}'.format(obj_child)) self._nodes_recreated_from_config[child_id] = ( obj_child, self._config_node_setter(setter)) self._all_nodes_recreated_from_config.add(obj_child) self._add_children_recreated_from_config(obj_child, child_proto, child_id)
def _graph_mode_decorator(f, args, kwargs): """Implement custom gradient decorator for graph mode.""" # TODO(rsepassi): Add support for kwargs if kwargs: raise ValueError( "The custom_gradient decorator currently supports keywords " "arguments only when eager execution is enabled.") name = "CustomGradient-%s" % ops.uid() args = [ops.convert_to_tensor(x) for x in args] # Checking global and local variables attempts to ensure that no non-resource # Variables are added to the graph. current_var_scope = variable_scope.get_variable_scope() before_vars = set([ v.experimental_ref() for v in current_var_scope.global_variables() + current_var_scope.local_variables() ]) with backprop.GradientTape() as tape: result, grad_fn = f(*args) after_vars = set([ v.experimental_ref() for v in current_var_scope.global_variables() + current_var_scope.local_variables() ]) new_vars = after_vars - before_vars new_vars_list = [v.deref() for v in new_vars] for v in new_vars_list: if not resource_variable_ops.is_resource_variable(v): raise TypeError( "All variables used by a function wrapped with @custom_gradient must " "be `ResourceVariable`s. Ensure that no `variable_scope` is created " "with `use_resource=False`.") # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. inputs = args variables_in_tape = frozenset( [v.experimental_ref() for v in tape.watched_variables()]) - frozenset(v.experimental_ref() for v in inputs) variables_in_subgraph = frozenset([ v.experimental_ref() for v in get_dependent_variables(input_ops=inputs, output_ops=result) ]) variables = list( [v.deref() for v in variables_in_subgraph.union(variables_in_tape)]) grad_argspec = tf_inspect.getfullargspec(grad_fn) variables_in_signature = ("variables" in grad_argspec.args or grad_argspec.varkw) if variables and not variables_in_signature: raise TypeError("If using @custom_gradient with a function that " "uses variables, then grad_fn must accept a keyword " "argument 'variables'.") if variables_in_signature and not variables: # User seems to intend to use variables but none were captured. if not variable_scope.get_variable_scope().use_resource: raise TypeError( "If using @custom_gradient with a function that " "uses variables, the enclosing variable scope must " "have use_resource=True.") else: logging.warn( "@custom_gradient grad_fn has 'variables' in signature, but " "no ResourceVariables were used on the forward pass.") flat_result = nest.flatten(result) flat_result_len = len(flat_result) all_tensors = flat_result + args + variables def tape_grad_fn(*result_grads): """Custom grad fn wrapper.""" result_grads = result_grads[:flat_result_len] if variables: input_grads, variable_grads = grad_fn(*result_grads, variables=variables) if len(variable_grads) != len(variables): raise ValueError("Must return gradient for each variable from " "@custom_gradient grad_fn.") else: input_grads = grad_fn(*result_grads) variable_grads = [] # Need to return one value per input to the IdentityN, so pad the # gradients of the inputs of the custom_gradient function with the # gradients of the outputs as well. input_grads = nest.flatten(input_grads) return ([None] * flat_result_len) + input_grads + variable_grads @ops.RegisterGradient(name) def internal_grad_fn(unused_op, *result_grads): # pylint: disable=unused-variable """Custom grad fn wrapper.""" return tape_grad_fn(*result_grads) original_tensors = all_tensors with ops.get_default_graph().gradient_override_map({"IdentityN": name}): all_tensors = array_ops.identity_n(all_tensors) original_tensors = [ops.convert_to_tensor(x) for x in original_tensors] # Propagate handle data for happier shape inference for resource variables. for i, t in enumerate(original_tensors): if t.dtype == dtypes.resource and hasattr(t, "_handle_data"): all_tensors[i]._handle_data = t._handle_data # pylint: disable=protected-access tape_lib.record_operation(f.__name__, all_tensors, original_tensors, tape_grad_fn) for ot, t in zip(original_tensors, all_tensors): copy_handle_data(ot, t) return nest.pack_sequence_as(structure=result, flat_sequence=all_tensors[:flat_result_len])
def embedding_lookup_sparse_with_distributed_aggregation( params, sp_ids, sp_weights, partition_strategy="mod", name=None, combiner=None, max_norm=None): """Computes embeddings for the given ids and weights. Embeddings belonging to same param are aggregated on that device first. This op is intended to decrease data transmission and improve parallelism. See `tf.nn.embedding_lookup_sparse` for the functionality and example of this op. Args: params: A single tensor representing the complete embedding tensor, or a list of P tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. sp_weights: either a SparseTensor of float / double weights, or None to indicate all weights should be taken to be 1. If specified, sp_weights must have exactly the same shape and indices as sp_ids. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: Optional name for the op. combiner: A string specifying the reduction op. Currently "mean", "sqrtn" and "sum" are supported. "sum" computes the weighted sum of the embedding results for each row. "mean" is the weighted sum divided by the total weight. "sqrtn" is the weighted sum divided by the square root of the sum of the squares of the weights. max_norm: If not None, each embedding is normalized to have l2 norm equal to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by sp_ids, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. Raises: TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither None nor SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if combiner not in ("mean", "sqrtn", "sum"): raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] if not isinstance(sp_ids, sparse_tensor.SparseTensor): raise TypeError("sp_ids must be SparseTensor") ignore_weights = sp_weights is None if not ignore_weights: if not isinstance(sp_weights, sparse_tensor.SparseTensor): raise TypeError("sp_weights must be either None or SparseTensor") sp_ids.values.get_shape().assert_is_compatible_with( sp_weights.values.get_shape()) sp_ids.indices.get_shape().assert_is_compatible_with( sp_weights.indices.get_shape()) sp_ids.dense_shape.get_shape().assert_is_compatible_with( sp_weights.dense_shape.get_shape()) # TODO(yleon): Add enhanced node assertions to verify that sp_ids and # sp_weights have equal indices and shapes. with ops.name_scope(name, "embedding_lookup_sparse", params + [sp_ids]) as name: segment_ids = sp_ids.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) ids = sp_ids.values if ignore_weights: ids, idx = array_ops.unique(ids) else: idx = None weights = None if ignore_weights else sp_weights.values embeddings = _embedding_lookup_with_distributed_aggregation( params, ids, partition_strategy=partition_strategy, max_norm=max_norm, weights=weights, idx=idx, segment_ids=segment_ids) # Set weights to all one if ignore weights. if ignore_weights: weights = array_ops.fill([array_ops.shape(segment_ids)[0]], 1) if weights.dtype != embeddings.dtype: weights = math_ops.cast(weights, embeddings.dtype) # Reshape weights. ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1) bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) if embeddings.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(embeddings.get_shape().ndims - 1)])) if combiner == "mean": weight_sum = math_ops.segment_sum(weights, segment_ids) embeddings = math_ops.div(embeddings, weight_sum) elif combiner == "sqrtn": weights_squared = math_ops.pow(weights, 2) weight_sum = math_ops.segment_sum(weights_squared, segment_ids) weight_sum_sqrt = math_ops.sqrt(weight_sum) embeddings = math_ops.div(embeddings, weight_sum_sqrt) elif combiner != "sum": assert False, "Unrecognized combiner" return embeddings
def testRecursiveCriticalSectionAccessWithinLoopIsProtected(self): cs = critical_section_ops.CriticalSection(shared_name="cs") def body_implicit_capture(i, j): # This would have caused a deadlock if not for logic in execute # that inserts additional control dependencies onto the lock op: # * Loop body argument j is captured by fn() # * i is running in parallel to move forward the execution # * j is not being checked by the predicate function # * output of cs.execute() is returned as next j. fn = lambda: j + 1 return (i + 1, cs.execute(fn)) (i_n, j_n) = control_flow_ops.while_loop(lambda i, _: i < 1000, body_implicit_capture, [0, 0], parallel_iterations=25) # For consistency between eager and graph mode. i_n = array_ops.identity(i_n) logging.warn( "\n==============\nRunning " "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " "body_implicit_capture'\n" "==============\n") self.assertEqual((1000, 1000), self.evaluate((i_n, j_n))) logging.warn( "\n==============\nSuccessfully finished running " "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " "body_implicit_capture'\n" "==============\n") def body_implicit_capture_protected(i, j): # This version is ok because we manually add a control # dependency on j, which is an argument to the while_loop body # and captured by fn. fn = lambda: j + 1 with ops.control_dependencies([j]): return (i + 1, cs.execute(fn)) (i_n, j_n) = control_flow_ops.while_loop(lambda i, _: i < 1000, body_implicit_capture_protected, [0, 0], parallel_iterations=25) # For consistency between eager and graph mode. i_n = array_ops.identity(i_n) logging.warn( "\n==============\nRunning " "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " "body_implicit_capture_protected'\n" "==============\n") self.assertEqual((1000, 1000), self.evaluate((i_n, j_n))) logging.warn( "\n==============\nSuccessfully finished running " "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " "body_implicit_capture_protected'\n" "==============\n") def body_args_capture(i, j): # This version is ok because j is an argument to fn and we can # ensure there's a control dependency on j. fn = lambda x: x + 1 return (i + 1, cs.execute(lambda: fn(j))) (i_n, j_n) = control_flow_ops.while_loop(lambda i, _: i < 1000, body_args_capture, [0, 0], parallel_iterations=25) # For consistency between eager and graph mode. i_n = array_ops.identity(i_n) logging.warn( "\n==============\nRunning " "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " "body_args_capture'\n" "==============\n") self.assertEqual((1000, 1000), self.evaluate((i_n, j_n))) logging.warn( "\n==============\nSuccessfully finished running " "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " "body_args_capture'\n" "==============\n")
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.compat.v1.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not None, all embeddings are l2-normalized to max_norm before combining. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.Dimension( tensor_shape.dimension_value(sparse_ids.dense_shape.get_shape()[0])) original_rank = ( array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor(sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where( is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice( math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) return final_result
def einsum(equation, *inputs, **kwargs): """A generalized contraction between tensors of arbitrary dimension. This function returns a tensor whose elements are defined by `equation`, which is written in a shorthand form inspired by the Einstein summation convention. As an example, consider multiplying two matrices A and B to form a matrix C. The elements of C are given by: ``` C[i,k] = sum_j A[i,j] * B[j,k] ``` The corresponding `equation` is: ``` ij,jk->ik ``` In general, the `equation` is obtained from the more familiar element-wise equation by 1. removing variable names, brackets, and commas, 2. replacing "*" with ",", 3. dropping summation signs, and 4. moving the output to the right, and replacing "=" with "->". Many common operations can be expressed in this way. For example: ```python # Matrix multiplication >>> einsum('ij,jk->ik', m0, m1) # output[i,k] = sum_j m0[i,j] * m1[j, k] # Dot product >>> einsum('i,i->', u, v) # output = sum_i u[i]*v[i] # Outer product >>> einsum('i,j->ij', u, v) # output[i,j] = u[i]*v[j] # Transpose >>> einsum('ij->ji', m) # output[j,i] = m[i,j] # Trace >>> einsum('ii', m) # output[j,i] = trace(m) = sum_i m[i, i] # Batch matrix multiplication >>> einsum('aij,ajk->aik', s, t) # out[a,i,k] = sum_j s[a,i,j] * t[a, j, k] ``` To enable and control broadcasting, use an ellipsis. For example, to do batch matrix multiplication, you could use: ```python >>> einsum('...ij,...jk->...ik', u, v) ``` This function behaves like `numpy.einsum`, but does not support: * Subscripts where an axis appears more than once for a single input (e.g. `ijj,k->ik`) unless it is a trace (e.g. `ijji`). Args: equation: a `str` describing the contraction, in the same format as `numpy.einsum`. *inputs: the inputs to contract (each one a `Tensor`), whose shapes should be consistent with `equation`. name: A name for the operation (optional). Returns: The contracted `Tensor`, with shape determined by `equation`. Raises: ValueError: If - the format of `equation` is incorrect, - the number of inputs implied by `equation` does not match `len(inputs)`, - an axis appears in the output subscripts but not in any of the inputs, - the number of dimensions of an input differs from the number of indices in its subscript, or - the input shapes are inconsistent along a particular axis. """ name = kwargs.pop('name', None) if kwargs: raise TypeError( 'invalid keyword arguments for this function: ' + ', '.join([format(key) for key in sorted(list(kwargs.keys()))])) with ops.name_scope(name, 'einsum', [equation, inputs]) as name: inputs = list(inputs) input_shapes = [x.get_shape() for x in inputs] input_axis_labels, output_axis_labels = _einsum_parse_and_resolve_equation( equation, input_shapes) axis_labels = set(''.join(input_axis_labels) + output_axis_labels) for a in axis_labels: for input_labels in input_axis_labels: if (len(input_axis_labels) == 1 and input_labels.count(a) == 2 and input_labels == input_labels[::-1] and '->' not in equation): return math_ops.trace(inputs[0]) if input_labels.count(a) > 1: raise ValueError( 'Subscript not supported: an axis appears more than once: %s' % input_labels) for a in axis_labels: input_count = sum(1 for s in input_axis_labels if a in s) if input_count > 2 and a not in output_axis_labels: logging.warn( 'Falling back to exponential-space implementation of einsum()' ' because index "%s" is summed over more than two inputs.', a) return _exponential_space_einsum(equation, *inputs) # Use xla_einsum if executing on TPU and if the operation is a 2 input # einsum supported by XlaEinsumOp. if _enclosing_tpu_context() is not None and len(inputs) == 2: return gen_xla_ops.xla_einsum( inputs[0], inputs[1], input_axis_labels[0] + ',' + input_axis_labels[1] + '->' + output_axis_labels) temp = inputs[0] temp_axis_labels = input_axis_labels[0] for i in xrange(len(inputs) - 1): axes_to_sum = ( set(temp_axis_labels) & set(input_axis_labels[i + 1]) - set(output_axis_labels)) temp, temp_axis_labels = _einsum_reduction( temp, temp_axis_labels, inputs[i + 1], input_axis_labels[i + 1], axes_to_sum) missing_indices = set(temp_axis_labels) - set(output_axis_labels) if missing_indices: axis = [ i for i, a in enumerate(temp_axis_labels) if a not in output_axis_labels ] temp = math_ops.reduce_sum(temp, axis=axis) temp_axis_labels = ''.join(a for a in temp_axis_labels if a in output_axis_labels) if sorted(temp_axis_labels) != sorted(output_axis_labels): raise ValueError('Invalid equation: %s' % equation) perm = [temp_axis_labels.index(a) for a in output_axis_labels] return _transpose_if_necessary(temp, perm)