def main(unused_argv=None): if FLAGS.debug: logging.set_verbosity(logging.DEBUG) if not FLAGS.logdir: logging.error('A logdir must be specified. Run `tensorboard --help` for ' 'details and examples.') return -1 if FLAGS.debug: logging.info('Starting TensorBoard in directory %s', os.getcwd()) path_to_run = ParseEventFilesFlag(FLAGS.logdir) multiplexer = event_multiplexer.AutoloadingMultiplexer( path_to_run=path_to_run, interval_secs=60, size_guidance=TENSORBOARD_SIZE_GUIDANCE) multiplexer.AutoUpdate(interval=30) factory = functools.partial(tensorboard_handler.TensorboardHandler, multiplexer) try: server = ThreadedHTTPServer((FLAGS.host, FLAGS.port), factory) except socket.error: logging.error('Tried to connect to port %d, but that address is in use.', FLAGS.port) return -2 status_bar.SetupStatusBarInsideGoogle('TensorBoard', FLAGS.port) print('Starting TensorBoard on port %d' % FLAGS.port) print('(You can navigate to http://localhost:%d)' % FLAGS.port) server.serve_forever()
def l1_regularizer(scale): """Returns a function that can be used to apply L1 regularization to weights. L1 regularization encourages sparsity. Args: scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer. Returns: A function with signature `l1(weights, name=None)` that apply L1 regularization. Raises: ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a float. """ if isinstance(scale, numbers.Integral): raise ValueError("scale cannot be an integer: %s" % scale) if isinstance(scale, numbers.Real): if scale < 0.0: raise ValueError("Setting a scale less than 0 on a regularizer: %g" % scale) if scale >= 1.0: raise ValueError("Setting a scale greater than 1 on a regularizer: %g" % scale) if scale == 0.0: logging.info("Scale of 0 disables regularizer.") return lambda _, name=None: None def l1(weights, name=None): """Applies L1 regularization to weights.""" with ops.op_scope([weights], name, "l1_regularizer") as scope: my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name="scale") return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.abs(weights)), name=scope) return l1
def testParsingReaderOp(self): # Runs the reader over the test input for two epochs. num_steps_a = 0 num_actions = 0 num_word_ids = 0 num_tag_ids = 0 num_label_ids = 0 batch_size = 10 with self.test_session() as sess: (words, tags, labels), epochs, gold_actions = (gen_parser_ops.gold_parse_reader( self._task_context, 3, batch_size, corpus_name='training-corpus')) while True: tf_gold_actions, tf_epochs, tf_words, tf_tags, tf_labels = ( sess.run([gold_actions, epochs, words, tags, labels])) num_steps_a += 1 num_actions = max(num_actions, max(tf_gold_actions) + 1) num_word_ids = max(num_word_ids, self.GetMaxId(tf_words) + 1) num_tag_ids = max(num_tag_ids, self.GetMaxId(tf_tags) + 1) num_label_ids = max(num_label_ids, self.GetMaxId(tf_labels) + 1) self.assertIn(tf_epochs, [0, 1, 2]) if tf_epochs > 1: break # Runs the reader again, this time with a lot of added graph nodes. num_steps_b = 0 with self.test_session() as sess: num_features = [6, 6, 4] num_feature_ids = [num_word_ids, num_tag_ids, num_label_ids] embedding_sizes = [8, 8, 8] hidden_layer_sizes = [32, 32] # Here we aim to test the iteration of the reader op in a complex network, # not the GraphBuilder. parser = graph_builder.GreedyParser(num_actions, num_features, num_feature_ids, embedding_sizes, hidden_layer_sizes) parser.AddTraining(self._task_context, batch_size, corpus_name='training-corpus') sess.run(parser.inits.values()) while True: tf_epochs, tf_cost, _ = sess.run([ parser.training['epochs'], parser.training['cost'], parser.training['train_op'] ]) num_steps_b += 1 self.assertGreaterEqual(tf_cost, 0) self.assertIn(tf_epochs, [0, 1, 2]) if tf_epochs > 1: break # Assert that the two runs made the exact same number of steps. logging.info('Number of steps in the two runs: %d, %d', num_steps_a, num_steps_b) self.assertEqual(num_steps_a, num_steps_b)
def l2_regularizer(scale): """Returns a function that can be used to apply L2 regularization to weights. Small values of L2 can help prevent overfitting the training data. Args: scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer. Returns: A function with signature `l2(weights, name=None)` that applies L2 regularization. Raises: ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a float. """ if isinstance(scale, numbers.Integral): raise ValueError("scale cannot be an integer: %s" % (scale,)) if isinstance(scale, numbers.Real): if scale < 0.0: raise ValueError("Setting a scale less than 0 on a regularizer: %g." % scale) if scale >= 1.0: raise ValueError("Setting a scale greater than 1 on a regularizer: %g." % scale) if scale == 0.0: logging.info("Scale of 0 disables regularizer.") return lambda _, name=None: None def l2(weights, name=None): """Applies l2 regularization to weights.""" with ops.op_scope([weights], name, "l2_regularizer") as scope: my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name="scale") return standard_ops.mul(my_scale, nn.l2_loss(weights), name=scope) return l2
def wait_for_session(self, master, config=None): """Creates a new `Session` and waits for model to be ready. Creates a new `Session` on 'master'. Waits for the model to be initialized or recovered from a checkpoint. It's expected that another thread or process will make the model ready, and that this is intended to be used by threads/processes that participate in a distributed training configuration where a different thread/process is responsible for initializing or recovering the model being trained. Args: master: `String` representation of the TensorFlow master to use. config: Optional ConfigProto proto used to configure the session. Returns: sess: A `Session`. """ target = self._maybe_launch_in_process_server(master) sess = session.Session(target, graph=self._graph, config=config) if self._local_init_op: sess.run([self._local_init_op]) while True: not_ready = self._model_not_ready(sess) if not not_ready: break self._safe_close(sess) logging.info("Waiting for model to be ready: %s", not_ready) time.sleep(self._recovery_wait_secs) sess = session.Session(master, graph=self._graph) return sess
def Load(): for (path, name) in six.iteritems(path_to_run): logging.info('Checking for new runs in %s', path) multiplexer.AddRunsFromDirectory(path, name) t = threading.Timer(interval_secs, Load) t.daemon = True t.start()
def _serve_static_file(self, path): """Serves the static file located at the given path. Args: path: The path of the static file, relative to the tensorboard/ directory. """ # Strip off the leading forward slash. path = path.lstrip('/') if not self._path_is_safe(path): logging.info('path %s not safe, sending 404', path) # Traversal attack, so 404. self.send_error(404) return if path.startswith('external'): path = os.path.join('../', path) else: path = os.path.join('tensorboard', path) # Open the file and read it. try: contents = resource_loader.load_resource(path) except IOError: logging.info('path %s not found, sending 404', path) self.send_error(404) return self.send_response(200) mimetype = mimetypes.guess_type(path)[0] or 'application/octet-stream' self.send_header('Content-Type', mimetype) self.end_headers() self.wfile.write(contents)
def _serve_static_file(self, path): """Serves the static file located at the given path. Args: path: The path of the static file, relative to the tensorboard/ directory. """ # Strip off the leading forward slash. path = path.lstrip('/') if not self._path_is_safe(path): logging.info('path %s not safe, sending 404', path) # Traversal attack, so 404. self.send_error(404) return if path.startswith('external'): path = os.path.join('../', path) else: path = os.path.join('tensorboard', path) # Open the file and read it. try: contents = resource_loader.load_resource(path) except IOError: logging.info('path %s not found, sending 404', path) self.send_error(404) return self.send_response(200) mimetype = mimetypes.guess_type(path)[0] or 'application/octet-stream' self.send_header('Content-Type', mimetype) self.end_headers() self.wfile.write(contents)
def Load(): for (path, name) in six.iteritems(path_to_run): logging.info('Checking for new runs in %s', path) multiplexer.AddRunsFromDirectory(path, name) t = threading.Timer(interval_secs, Load) t.daemon = True t.start()
def ListRecursively(top): """Walks a directory tree, yielding (dir_path, file_paths) tuples. For each top |top| and its subdirectories, yields a tuple containing the path to the directory and the path to each of the contained files. Note that unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file paths are all absolute. Args: top: A path to a GCS directory. Returns: A list of (dir_path, file_paths) tuples. """ if top.endswith('/'): wildcard = top + '**' else: wildcard = top + '/**' tuples = [] try: file_paths = ListDirectory(wildcard) except subprocess.CalledProcessError as e: logging.info('%s, assuming it means no files were found', e) return [] for file_path in file_paths: dir_path = os.path.dirname(file_path) if tuples and tuples[-1][0] == dir_path: tuples[-1][1].append(file_path) else: tuples.append((dir_path, [file_path])) return tuples
def wait_for_session(self, master, config=None): """Creates a new `Session` and waits for model to be ready. Creates a new `Session` on 'master'. Waits for the model to be initialized or recovered from a checkpoint. It's expected that another thread or process will make the model ready, and that this is intended to be used by threads/processes that participate in a distributed training configuration where a different thread/process is responsible for initializing or recovering the model being trained. Args: master: `String` representation of the TensorFlow master to use. config: Optional ConfigProto proto used to configure the session. Returns: sess: A `Session`. """ target = self._maybe_launch_in_process_server(master) sess = session.Session(target, graph=self._graph, config=config) if self._local_init_op: sess.run([self._local_init_op]) while True: not_ready = self._model_not_ready(sess) if not not_ready: break self._safe_close(sess) logging.info("Waiting for model to be ready: %s", not_ready) time.sleep(self._recovery_wait_secs) sess = session.Session(target, graph=self._graph, config=config) return sess
def predicate(e): err_str = e.message op = e.op while op is not None: err_str += "\nCaused by: " + op.name op = op._original_op logging.info("Searching within error strings: '%s' within '%s'", expected_err_re_or_predicate, err_str) return re.search(expected_err_re_or_predicate, err_str)
def testParseUntilNotAlive(self): """Ensures that the 'alive' condition works in the Cond ops.""" with self.test_session(graph=tf.Graph()) as sess: t = self.MakeGraph(batch_size=3, beam_size=2, max_steps=5).training sess.run(t['inits']) for i in range(5): logging.info('run %d', i) tf_alive = t['alive'].eval() self.assertFalse(any(tf_alive))
def testCoNLLFormat(self): self.WriteContext('conll-sentence') logging.info('Writing conll file to: %s', self.corpus_file) with open(self.corpus_file, 'w') as f: f.write((CONLL_DOC1 + u'\n\n' + CONLL_DOC2 + u'\n') .replace(' ', '\t').encode('utf-8')) self.ValidateDocuments() self.BuildLexicon() self.ValidateTagToCategoryMap()
def predicate(e): err_str = e.message op = e.op while op is not None: err_str += "\nCaused by: " + op.name op = op._original_op logging.info("Searching within error strings: '%s' within '%s'", expected_err_re_or_predicate, err_str) return re.search(expected_err_re_or_predicate, err_str)
def wait_for_session(self, master, config=None, max_wait_secs=float("Inf")): """Creates a new `Session` and waits for model to be ready. Creates a new `Session` on 'master'. Waits for the model to be initialized or recovered from a checkpoint. It's expected that another thread or process will make the model ready, and that this is intended to be used by threads/processes that participate in a distributed training configuration where a different thread/process is responsible for initializing or recovering the model being trained. NB: The amount of time this method waits for the session is bounded by max_wait_secs. By default, this function will wait indefinitely. Args: master: `String` representation of the TensorFlow master to use. config: Optional ConfigProto proto used to configure the session. max_wait_secs: Maximum time to wait for the session to become available. Returns: A `Session`. May be None if the operation exceeds the timeout specified by config.operation_timeout_in_ms. Raises: tf.DeadlineExceededError: if the session is not available after max_wait_secs. """ target = self._maybe_launch_in_process_server(master) if max_wait_secs is None: max_wait_secs = float("Inf") timer = _CountDownTimer(max_wait_secs) while True: sess = session.Session(target, graph=self._graph, config=config) if self._local_init_op: sess.run([self._local_init_op]) not_ready = self._model_not_ready(sess) if not not_ready: return sess self._safe_close(sess) # Do we have enough time left to try again? remaining_ms_after_wait = (timer.secs_remaining() - self._recovery_wait_secs) if remaining_ms_after_wait < 0: raise errors.DeadlineExceededError( None, None, "Session was not ready after waiting %d secs." % (max_wait_secs, )) logging.info("Waiting for model to be ready: %s", not_ready) time.sleep(self._recovery_wait_secs)
def WriteContext(self, corpus_format): context = task_spec_pb2.TaskSpec() self.AddInput('documents', self.corpus_file, corpus_format, context) for name in ('word-map', 'lcword-map', 'tag-map', 'category-map', 'label-map', 'prefix-table', 'suffix-table', 'tag-to-category'): self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context) logging.info('Writing context to: %s', self.context_file) with open(self.context_file, 'w') as f: f.write(str(context))
def _Load(): start = time.time() for (path, name) in six.iteritems(path_to_run): multiplexer.AddRunsFromDirectory(path, name) multiplexer.Reload() duration = time.time() - start logging.info('Multiplexer done loading. Load took %0.1f secs', duration) t = threading.Timer(LOAD_INTERVAL, _Load) t.daemon = True t.start()
def WriteContext(self, corpus_format): context = task_spec_pb2.TaskSpec() self.AddInput('documents', self.corpus_file, corpus_format, context) for name in ('word-map', 'lcword-map', 'tag-map', 'category-map', 'label-map', 'prefix-table', 'suffix-table', 'tag-to-category'): self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context) logging.info('Writing context to: %s', self.context_file) with open(self.context_file, 'w') as f: f.write(str(context))
def _Load(): start = time.time() for (path, name) in six.iteritems(path_to_run): multiplexer.AddRunsFromDirectory(path, name) multiplexer.Reload() duration = time.time() - start logging.info('Multiplexer done loading. Load took %0.1f secs', duration) t = threading.Timer(LOAD_INTERVAL, _Load) t.daemon = True t.start()
def convert_variables_to_constants(sess, input_graph_def, output_node_names): """Replaces all the variables in a graph with constants of the same values. If you have a trained graph containing Variable ops, it can be convenient to convert them all to Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. Args: sess: Active TensorFlow session containing the variables. input_graph_def: GraphDef object holding the network. output_node_names: List of name strings for the result nodes of the graph. Returns: GraphDef containing a simplified version of the original. """ found_variables = {} variable_names = [] variable_dict_names = [] for node in input_graph_def.node: if node.op == "Assign": variable_name = node.input[0] variable_dict_names.append(variable_name) variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: returned_variables = [] found_variables = dict(zip(variable_dict_names, returned_variables)) logging.info("Frozen %d variables." % len(returned_variables)) # This graph only includes the nodes needed to evaluate the output nodes, and # removes unneeded nodes like those involved in saving and assignment. inference_graph = extract_sub_graph(input_graph_def, output_node_names) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = graph_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom(attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto(data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) print("Converted %d variables to const ops." % how_many_converted) return output_graph_def
def CheckTokenization(self, sentence, tokenization): self.WriteContext('english-text') logging.info('Writing text file to: %s', self.corpus_file) with open(self.corpus_file, 'w') as f: f.write(sentence) sentence, _ = gen_parser_ops.document_source( self.context_file, batch_size=1) with self.test_session() as sess: sentence_doc = self.ReadNextDocument(sess, sentence) self.assertEqual(' '.join([t.word for t in sentence_doc.token]), tokenization)
def convert_variables_to_constants(sess, input_graph_def, output_node_names): """Replaces all the variables in a graph with constants of the same values. If you have a trained graph containing Variable ops, it can be convenient to convert them all to Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. Args: sess: Active TensorFlow session containing the variables. input_graph_def: GraphDef object holding the network. output_node_names: List of name strings for the result nodes of the graph. Returns: GraphDef containing a simplified version of the original. """ found_variables = {} variable_names = [] variable_dict_names = [] for node in input_graph_def.node: if node.op == "Assign": variable_name = node.input[0] variable_dict_names.append(variable_name) variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: returned_variables = [] found_variables = dict(zip(variable_dict_names, returned_variables)) logging.info("Frozen %d variables." % len(returned_variables)) # This graph only includes the nodes needed to evaluate the output nodes, and # removes unneeded nodes like those involved in saving and assignment. inference_graph = extract_sub_graph(input_graph_def, output_node_names) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = graph_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) print("Converted %d variables to const ops." % how_many_converted) return output_graph_def
def CheckTokenization(self, sentence, tokenization): self.WriteContext('english-text') logging.info('Writing text file to: %s', self.corpus_file) with open(self.corpus_file, 'w') as f: f.write(sentence) sentence, _ = gen_parser_ops.document_source(self.context_file, batch_size=1) with self.test_session() as sess: sentence_doc = self.ReadNextDocument(sess, sentence) self.assertEqual(' '.join([t.word for t in sentence_doc.token]), tokenization)
def AddRunsFromDirectory(self, path, name=None): """Load runs from a directory; recursively walks subdirectories. If path doesn't exist, no-op. This ensures that it is safe to call `AddRunsFromDirectory` multiple times, even before the directory is made. If path is a directory, load event files in the directory (if any exist) and recursively call AddRunsFromDirectory on any subdirectories. This mean you can call AddRunsFromDirectory at the root of a tree of event logs and TensorBoard will load them all. If the `EventMultiplexer` is already loaded this will cause the newly created accumulators to `Reload()`. Args: path: A string path to a directory to load runs from. name: Optionally, what name to apply to the runs. If name is provided and the directory contains run subdirectories, the name of each subrun is the concatenation of the parent name and the subdirectory name. If name is provided and the directory contains event files, then a run is added called "name" and with the events from the path. Raises: ValueError: If the path exists and isn't a directory. Returns: The `EventMultiplexer`. """ subdirs = [] if gcs.IsGCSPath(path): subdirs = [ subdir for (subdir, files) in gcs.ListRecursively(path) if list(filter(event_accumulator.IsTensorFlowEventsFile, files)) ] else: if not gfile.Exists(path): return # Maybe it hasn't been created yet, fail silently to retry later if not gfile.IsDirectory(path): raise ValueError('AddRunsFromDirectory: path exists and is not a ' 'directory, %s' % path) subdirs = [ subdir for (subdir, _, files) in gfile.Walk(path) if list(filter(event_accumulator.IsTensorFlowEventsFile, files)) ] for subdir in subdirs: logging.info('Adding events from directory %s', subdir) rpath = os.path.relpath(subdir, path) subname = os.path.join(name, rpath) if name else rpath self.AddRun(subdir, name=subname) return self
def AddRunsFromDirectory(self, path, name=None): """Load runs from a directory; recursively walks subdirectories. If path doesn't exist, no-op. This ensures that it is safe to call `AddRunsFromDirectory` multiple times, even before the directory is made. If path is a directory, load event files in the directory (if any exist) and recursively call AddRunsFromDirectory on any subdirectories. This mean you can call AddRunsFromDirectory at the root of a tree of event logs and TensorBoard will load them all. If the `EventMultiplexer` is already loaded this will cause the newly created accumulators to `Reload()`. Args: path: A string path to a directory to load runs from. name: Optionally, what name to apply to the runs. If name is provided and the directory contains run subdirectories, the name of each subrun is the concatenation of the parent name and the subdirectory name. If name is provided and the directory contains event files, then a run is added called "name" and with the events from the path. Raises: ValueError: If the path exists and isn't a directory. Returns: The `EventMultiplexer`. """ subdirs = [] if gcs.IsGCSPath(path): subdirs = [ subdir for (subdir, files) in gcs.ListRecursively(path) if list( filter(event_accumulator.IsTensorFlowEventsFile, files)) ] else: if not gfile.Exists(path): return # Maybe it hasn't been created yet, fail silently to retry later if not gfile.IsDirectory(path): raise ValueError( 'AddRunsFromDirectory: path exists and is not a ' 'directory, %s' % path) subdirs = [ subdir for (subdir, _, files) in gfile.Walk(path) if list( filter(event_accumulator.IsTensorFlowEventsFile, files)) ] for subdir in subdirs: logging.info('Adding events from directory %s', subdir) rpath = os.path.relpath(subdir, path) subname = os.path.join(name, rpath) if name else rpath self.AddRun(subdir, name=subname) return self
def Load(self): """Loads new values from disk. The watcher will load from one file at a time; as soon as that file stops yielding events, it will move on to the next file. We assume that old files are never modified after a newer file has been written. As a result, Load() can be called multiple times in a row without losing events that have not been yielded yet. In other words, we guarantee that every event will be yielded exactly once. Yields: All values that were written to disk that have not been yielded yet. """ # If the loader exists, check it for a value. if not self._loader: self._InitializeLoader() while True: # Yield all the new events in the file we're currently loading from. for event in self._loader.Load(): yield event next_path = self._GetNextPath() if not next_path: logging.info('No more files in %s', self._directory) # Current file is empty and there are no new files, so we're done. return # There's a new file, so check to make sure there weren't any events # written between when we finished reading the current file and when we # checked for the new one. The sequence of events might look something # like this: # # 1. Event #1 written to file #1. # 2. We check for events and yield event #1 from file #1 # 3. We check for events and see that there are no more events in file #1. # 4. Event #2 is written to file #1. # 5. Event #3 is written to file #2. # 6. We check for a new file and see that file #2 exists. # # Without this loop, we would miss event #2. We're also guaranteed by the # loader contract that no more events will be written to file #1 after # events start being written to file #2, so we don't have to worry about # that. for event in self._loader.Load(): yield event logging.info('Directory watcher for %s advancing to file %s', self._directory, next_path) # Advance to the next file and start over. self._SetPath(next_path)
def Load(self): """Loads new values from disk. The watcher will load from one file at a time; as soon as that file stops yielding events, it will move on to the next file. We assume that old files are never modified after a newer file has been written. As a result, Load() can be called multiple times in a row without losing events that have not been yielded yet. In other words, we guarantee that every event will be yielded exactly once. Yields: All values that were written to disk that have not been yielded yet. """ # If the loader exists, check it for a value. if not self._loader: self._InitializeLoader() while True: # Yield all the new events in the file we're currently loading from. for event in self._loader.Load(): yield event next_path = self._GetNextPath() if not next_path: logging.info('No more files in %s', self._directory) # Current file is empty and there are no new files, so we're done. return # There's a new file, so check to make sure there weren't any events # written between when we finished reading the current file and when we # checked for the new one. The sequence of events might look something # like this: # # 1. Event #1 written to file #1. # 2. We check for events and yield event #1 from file #1 # 3. We check for events and see that there are no more events in file #1. # 4. Event #2 is written to file #1. # 5. Event #3 is written to file #2. # 6. We check for a new file and see that file #2 exists. # # Without this loop, we would miss event #2. We're also guaranteed by the # loader contract that no more events will be written to file #1 after # events start being written to file #2, so we don't have to worry about # that. for event in self._loader.Load(): yield event logging.info('Directory watcher for %s advancing to file %s', self._directory, next_path) # Advance to the next file and start over. self._SetPath(next_path)
def wait_for_session(self, master, config=None, max_wait_secs=float("Inf")): """Creates a new `Session` and waits for model to be ready. Creates a new `Session` on 'master'. Waits for the model to be initialized or recovered from a checkpoint. It's expected that another thread or process will make the model ready, and that this is intended to be used by threads/processes that participate in a distributed training configuration where a different thread/process is responsible for initializing or recovering the model being trained. NB: The amount of time this method waits for the session is bounded by max_wait_secs. By default, this function will wait indefinitely. Args: master: `String` representation of the TensorFlow master to use. config: Optional ConfigProto proto used to configure the session. max_wait_secs: Maximum time to wait for the session to become available. Returns: A `Session`. May be None if the operation exceeds the timeout specified by config.operation_timeout_in_ms. Raises: tf.DeadlineExceededError: if the session is not available after max_wait_secs. """ target = self._maybe_launch_in_process_server(master) if max_wait_secs is None: max_wait_secs = float("Inf") timer = _CountDownTimer(max_wait_secs) while True: sess = session.Session(target, graph=self._graph, config=config) if self._local_init_op: sess.run([self._local_init_op]) not_ready = self._model_not_ready(sess) if not not_ready: return sess self._safe_close(sess) # Do we have enough time left to try again? remaining_ms_after_wait = ( timer.secs_remaining() - self._recovery_wait_secs) if remaining_ms_after_wait < 0: raise errors.DeadlineExceededError( None, None, "Session was not ready after waiting %d secs." % (max_wait_secs,)) logging.info("Waiting for model to be ready: %s", not_ready) time.sleep(self._recovery_wait_secs)
def testParsingReaderOp(self): # Runs the reader over the test input for two epochs. num_steps_a = 0 num_actions = 0 num_word_ids = 0 num_tag_ids = 0 num_label_ids = 0 batch_size = 10 with self.test_session() as sess: (words, tags, labels), epochs, gold_actions = gen_parser_ops.gold_parse_reader( self._task_context, 3, batch_size, corpus_name="training-corpus" ) while True: tf_gold_actions, tf_epochs, tf_words, tf_tags, tf_labels = sess.run( [gold_actions, epochs, words, tags, labels] ) num_steps_a += 1 num_actions = max(num_actions, max(tf_gold_actions) + 1) num_word_ids = max(num_word_ids, self.GetMaxId(tf_words) + 1) num_tag_ids = max(num_tag_ids, self.GetMaxId(tf_tags) + 1) num_label_ids = max(num_label_ids, self.GetMaxId(tf_labels) + 1) self.assertIn(tf_epochs, [0, 1, 2]) if tf_epochs > 1: break # Runs the reader again, this time with a lot of added graph nodes. num_steps_b = 0 with self.test_session() as sess: num_features = [6, 6, 4] num_feature_ids = [num_word_ids, num_tag_ids, num_label_ids] embedding_sizes = [8, 8, 8] hidden_layer_sizes = [32, 32] # Here we aim to test the iteration of the reader op in a complex network, # not the GraphBuilder. parser = graph_builder.GreedyParser( num_actions, num_features, num_feature_ids, embedding_sizes, hidden_layer_sizes ) parser.AddTraining(self._task_context, batch_size, corpus_name="training-corpus") sess.run(parser.inits.values()) while True: tf_epochs, tf_cost, _ = sess.run( [parser.training["epochs"], parser.training["cost"], parser.training["train_op"]] ) num_steps_b += 1 self.assertGreaterEqual(tf_cost, 0) self.assertIn(tf_epochs, [0, 1, 2]) if tf_epochs > 1: break # Assert that the two runs made the exact same number of steps. logging.info("Number of steps in the two runs: %d, %d", num_steps_a, num_steps_b) self.assertEqual(num_steps_a, num_steps_b)
def pin_to_cpu(op): """Returns a CPU device for the given node.""" device = op.device if op.device is not None else "" dev = pydev.from_string(device) if not dev.device_type: return set_cpu0(device) if dev.device_type == "CPU": return device logging.info("Operation %s has been assigned to a non-CPU (%s), so " "it will not be pinned to the CPU.", op.name, dev.device_type) return device
def pin_to_cpu(op): """Returns a CPU device for the given node.""" device = op.device if op.device is not None else "" dev = pydev.from_string(device) if not dev.device_type: return set_cpu0(device) if dev.device_type == "CPU": return device logging.info("Operation %s has been assigned to a non-CPU (%s), so " "it will not be pinned to the CPU.", op.name, dev.device_type) return device
def ReloadMultiplexer(multiplexer, path_to_run): """Loads all runs into the multiplexer. Args: multiplexer: The `EventMultiplexer` to add runs to and reload. path_to_run: A dict mapping from paths to run names, where `None` as the run name is interpreted as a run name equal to the path. """ start = time.time() for (path, name) in six.iteritems(path_to_run): multiplexer.AddRunsFromDirectory(path, name) multiplexer.Reload() duration = time.time() - start logging.info('Multiplexer done loading. Load took %0.1f secs', duration)
def AddRunsFromDirectory(self, path, name=None): """Load runs from a directory, assuming each subdirectory is a run. If path doesn't exist, no-op. This ensures that it is safe to call `AddRunsFromDirectory` multiple times, even before the directory is made. If the directory contains TensorFlow event files, it is itself treated as a run. If the `EventMultiplexer` is already loaded or autoupdating, this will cause the newly created accumulators to also `Reload()` or `AutoUpdate()`. Args: path: A string path to a directory to load runs from. name: Optionally, what name to apply to the runs. If name is provided and the directory contains run subdirectories, the name of each subrun is the concatenation of the parent name and the subdirectory name. If name is provided and the directory contains event files, then a run is added called "name" and with the events from the path. Raises: ValueError: If the path exists and isn't a directory. Returns: The `EventMultiplexer`. """ if not gfile.Exists(path): return # Maybe it hasn't been created yet, fail silently to retry later if not gfile.IsDirectory(path): raise ValueError('Path exists and is not a directory, %s' % path) paths = gfile.ListDirectory(path) is_directory = lambda x: gfile.IsDirectory(os.path.join(path, x)) subdirectories = filter(is_directory, paths) for s in subdirectories: if name: subname = '/'.join([name, s]) else: subname = s self.AddRun(os.path.join(path, s), subname) if list(filter(event_accumulator.IsTensorFlowEventsFile, paths)): directory_name = os.path.split(path)[1] logging.info('Directory %s has event files; loading', directory_name) if name: dname = name else: dname = directory_name self.AddRun(path, dname) return self
def AddRunsFromDirectory(self, path, name=None): """Load runs from a directory, assuming each subdirectory is a run. If path doesn't exist, no-op. This ensures that it is safe to call `AddRunsFromDirectory` multiple times, even before the directory is made. If the directory contains TensorFlow event files, it is itself treated as a run. If the `EventMultiplexer` is already loaded or autoupdating, this will cause the newly created accumulators to also `Reload()` or `AutoUpdate()`. Args: path: A string path to a directory to load runs from. name: Optionally, what name to apply to the runs. If name is provided and the directory contains run subdirectories, the name of each subrun is the concatenation of the parent name and the subdirectory name. If name is provided and the directory contains event files, then a run is added called "name" and with the events from the path. Raises: ValueError: If the path exists and isn't a directory. Returns: The `EventMultiplexer`. """ if not gfile.Exists(path): return # Maybe it hasn't been created yet, fail silently to retry later if not gfile.IsDirectory(path): raise ValueError('Path exists and is not a directory, %s' % path) paths = gfile.ListDirectory(path) is_directory = lambda x: gfile.IsDirectory(os.path.join(path, x)) subdirectories = filter(is_directory, paths) for s in subdirectories: if name: subname = '/'.join([name, s]) else: subname = s self.AddRun(os.path.join(path, s), subname) if list(filter(event_accumulator.IsTensorFlowEventsFile, paths)): directory_name = os.path.split(path)[1] logging.info('Directory %s has event files; loading', directory_name) if name: dname = name else: dname = directory_name self.AddRun(path, dname) return self
def _call_func(self, args, kwargs, check_for_new_variables): try: vars_at_start = len(ops.get_collection(ops.GraphKeys.VARIABLES)) trainable_at_start = len( ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) result = self._func(*args, **kwargs) if check_for_new_variables: trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) # If a variable that we intend to train is created as a side effect # of creating a template, then that is almost certainly an error. if trainable_at_start != len(trainable_variables): raise ValueError( "Trainable variable created when calling a template " "after the first time, perhaps you used tf.Variable " "when you meant tf.get_variable: %s" % (trainable_variables[trainable_at_start:], )) # Non-trainable tracking variables are a legitimate reason why a new # variable would be created, but it is a relatively advanced use-case, # so log it. variables = ops.get_collection(ops.GraphKeys.VARIABLES) if vars_at_start != len(variables): logging.info( "New variables created when calling a template after " "the first time, perhaps you used tf.Variable when you " "meant tf.get_variable: %s", variables[vars_at_start:]) return result except Exception, exc: # Reraise the exception, but append the original definition to the # trace. args = exc.args if not args: arg0 = "" else: arg0 = args[0] trace = "".join( _skip_common_stack_elements(self._stacktrace, traceback.format_stack())) arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace) new_args = [arg0] new_args.extend(args[1:]) exc.args = tuple(new_args) raise
def _AddParam(self, shape, dtype, name, initializer=None, return_average=False): """Add a model parameter w.r.t. we expect to compute gradients. _AddParam creates both regular parameters (usually for training) and averaged nodes (usually for inference). It returns one or the other based on the 'return_average' arg. Args: shape: int list, tensor shape of the parameter to create dtype: tf.DataType, data type of the parameter name: string, name of the parameter in the TF graph initializer: optional initializer for the paramter return_average: if False, return parameter otherwise return moving average Returns: parameter or averaged parameter """ if name not in self.params: step = tf.cast(self.GetStep(), tf.float32) # Put all parameters and their initializing ops in their own scope # irrespective of the current scope (training or eval). with tf.name_scope(self._param_scope): self.params[name] = tf.get_variable(name, shape, dtype, initializer) param = self.params[name] if initializer is not None: self.inits[name] = state_ops.init_variable(param, initializer) if self._averaging_decay == 1: logging.info('Using vanilla averaging of parameters.') ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)), num_updates=None) else: ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay, num_updates=step) self._averaging[name + '_avg_update'] = ema.apply([param]) self.variables[name + '_avg_var'] = ema.average(param) self.inits[name + '_avg_init'] = state_ops.init_variable( ema.average(param), tf.zeros_initializer) return (self.variables[name + '_avg_var'] if return_average else self.params[name])
def generate_checkpoint_state_proto(save_dir, model_checkpoint_path, all_model_checkpoint_paths=None): """Generates a checkpoint state proto. Args: save_dir: Directory where the model was saved. model_checkpoint_path: The checkpoint file. all_model_checkpoint_paths: List of strings. Paths to all not-yet-deleted checkpoints, sorted from oldest to newest. If this is a non-empty list, the last element must be equal to model_checkpoint_path. These paths are also saved in the CheckpointState proto. Returns: CheckpointState proto with model_checkpoint_path and all_model_checkpoint_paths updated to either absolute paths or relative paths to the current save_dir. """ if all_model_checkpoint_paths is None: all_model_checkpoint_paths = [] if (not all_model_checkpoint_paths or all_model_checkpoint_paths[-1] != model_checkpoint_path): logging.info( "%s is not in all_model_checkpoint_paths. Manually adding it.", model_checkpoint_path) all_model_checkpoint_paths.append(model_checkpoint_path) # Relative paths need to be rewritten to be relative to the "save_dir" # if model_checkpoint_path already contains "save_dir". if not os.path.isabs(save_dir): if not os.path.isabs(model_checkpoint_path): model_checkpoint_path = os.path.relpath(model_checkpoint_path, save_dir) for i in range(len(all_model_checkpoint_paths)): p = all_model_checkpoint_paths[i] if not os.path.isabs(p): all_model_checkpoint_paths[i] = os.path.relpath(p, save_dir) coord_checkpoint_proto = CheckpointState( model_checkpoint_path=model_checkpoint_path, all_model_checkpoint_paths=all_model_checkpoint_paths) return coord_checkpoint_proto
def main(unused_argv=None): if FLAGS.debug: logging.set_verbosity(logging.DEBUG) logging.info('TensorBoard is in debug mode.') if not FLAGS.logdir: logging.error( 'A logdir must be specified. Run `tensorboard --help` for ' 'details and examples.') return -1 logging.info('Starting TensorBoard in directory %s', os.getcwd()) path_to_run = ParseEventFilesFlag(FLAGS.logdir) logging.info('TensorBoard path_to_run is: %s', path_to_run) multiplexer = event_multiplexer.EventMultiplexer( size_guidance=TENSORBOARD_SIZE_GUIDANCE) def _Load(): start = time.time() for (path, name) in six.iteritems(path_to_run): multiplexer.AddRunsFromDirectory(path, name) multiplexer.Reload() duration = time.time() - start logging.info('Multiplexer done loading. Load took %0.1f secs', duration) t = threading.Timer(LOAD_INTERVAL, _Load) t.daemon = True t.start() t = threading.Timer(0, _Load) t.daemon = True t.start() factory = functools.partial(tensorboard_handler.TensorboardHandler, multiplexer) try: server = ThreadedHTTPServer((FLAGS.host, FLAGS.port), factory) except socket.error: logging.error( 'Tried to connect to port %d, but that address is in use.', FLAGS.port) return -2 try: tag = resource_loader.load_resource('tensorboard/TAG').strip() logging.info('TensorBoard is tag: %s', tag) except IOError: logging.warning('Unable to read TensorBoard tag') tag = '' status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port) print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port)) print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port)) server.serve_forever()
def generate_checkpoint_state_proto(save_dir, model_checkpoint_path, all_model_checkpoint_paths=None): """Generates a checkpoint state proto. Args: save_dir: Directory where the model was saved. model_checkpoint_path: The checkpoint file. all_model_checkpoint_paths: List of strings. Paths to all not-yet-deleted checkpoints, sorted from oldest to newest. If this is a non-empty list, the last element must be equal to model_checkpoint_path. These paths are also saved in the CheckpointState proto. Returns: CheckpointState proto with model_checkpoint_path and all_model_checkpoint_paths updated to either absolute paths or relative paths to the current save_dir. """ if all_model_checkpoint_paths is None: all_model_checkpoint_paths = [] if (not all_model_checkpoint_paths or all_model_checkpoint_paths[-1] != model_checkpoint_path): logging.info( "%s is not in all_model_checkpoint_paths. Manually adding it.", model_checkpoint_path) all_model_checkpoint_paths.append(model_checkpoint_path) # Relative paths need to be rewritten to be relative to the "save_dir" # if model_checkpoint_path already contains "save_dir". if not os.path.isabs(save_dir): if not os.path.isabs(model_checkpoint_path): model_checkpoint_path = os.path.relpath(model_checkpoint_path, save_dir) for i in range(len(all_model_checkpoint_paths)): p = all_model_checkpoint_paths[i] if not os.path.isabs(p): all_model_checkpoint_paths[i] = os.path.relpath(p, save_dir) coord_checkpoint_proto = CheckpointState( model_checkpoint_path=model_checkpoint_path, all_model_checkpoint_paths=all_model_checkpoint_paths) return coord_checkpoint_proto
def request_stop(self, ex=None): """Request that the threads stop. After this is called, calls to `should_stop()` will return `True`. Note: If an exception is being passed in, in must be in the context of handling the exception (i.e. `try: ... except Exception as ex: ...`) and not a newly created one. Args: ex: Optional `Exception`, or Python `exc_info` tuple as returned by `sys.exc_info()`. If this is the first call to `request_stop()` the corresponding exception is recorded and re-raised from `join()`. """ ex = self._filter_exception(ex) with self._lock: if not self._stop_event.is_set(): if ex and self._exc_info_to_raise is None: if isinstance(ex, tuple): logging.info("Error reported to Coordinator: %s", compat.as_str_any(ex[1])) self._exc_info_to_raise = ex else: logging.info("Error reported to Coordinator: %s", compat.as_str_any(ex)) self._exc_info_to_raise = sys.exc_info() # self._exc_info_to_raise should contain a tuple containing exception # (type, value, traceback) if (len(self._exc_info_to_raise) != 3 or not self._exc_info_to_raise[0] or not self._exc_info_to_raise[1]): # Raise, catch and record the exception here so that error happens # where expected. try: raise ValueError( "ex must be a tuple or sys.exc_info must return the current " "exception: %s" % self._exc_info_to_raise) except ValueError: # Record this error so it kills the coordinator properly. self._exc_info_to_raise = sys.exc_info() self._stop_event.set()
def _call_func(self, args, kwargs, check_for_new_variables): try: vars_at_start = len(ops.get_collection(ops.GraphKeys.VARIABLES)) trainable_at_start = len( ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) result = self._func(*args, **kwargs) if check_for_new_variables: trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) # If a variable that we intend to train is created as a side effect # of creating a template, then that is almost certainly an error. if trainable_at_start != len(trainable_variables): raise ValueError("Trainable variable created when calling a template " "after the first time, perhaps you used tf.Variable " "when you meant tf.get_variable: %s" % (trainable_variables[trainable_at_start:],)) # Non-trainable tracking variables are a legitimate reason why a new # variable would be created, but it is a relatively advanced use-case, # so log it. variables = ops.get_collection(ops.GraphKeys.VARIABLES) if vars_at_start != len(variables): logging.info("New variables created when calling a template after " "the first time, perhaps you used tf.Variable when you " "meant tf.get_variable: %s", variables[vars_at_start:]) return result except Exception, exc: # Reraise the exception, but append the original definition to the # trace. args = exc.args if not args: arg0 = "" else: arg0 = args[0] trace = "".join(_skip_common_stack_elements(self._stacktrace, traceback.format_stack())) arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace) new_args = [arg0] new_args.extend(args[1:]) exc.args = tuple(new_args) raise
def main(unused_argv=None): if FLAGS.debug: logging.set_verbosity(logging.DEBUG) logging.info('TensorBoard is in debug mode.') if not FLAGS.logdir: logging.error('A logdir must be specified. Run `tensorboard --help` for ' 'details and examples.') return -1 logging.info('Starting TensorBoard in directory %s', os.getcwd()) path_to_run = ParseEventFilesFlag(FLAGS.logdir) logging.info('TensorBoard path_to_run is: %s', path_to_run) multiplexer = event_multiplexer.EventMultiplexer( size_guidance=TENSORBOARD_SIZE_GUIDANCE) # Ensure the Multiplexer initializes in a loaded state before it adds runs # So it can handle HTTP requests while runs are loading multiplexer.Reload() def _Load(): start = time.time() for (path, name) in six.iteritems(path_to_run): multiplexer.AddRunsFromDirectory(path, name) multiplexer.Reload() duration = time.time() - start logging.info('Multiplexer done loading. Load took %0.1f secs', duration) t = threading.Timer(LOAD_INTERVAL, _Load) t.daemon = True t.start() t = threading.Timer(0, _Load) t.daemon = True t.start() factory = functools.partial(tensorboard_handler.TensorboardHandler, multiplexer) try: server = ThreadedHTTPServer((FLAGS.host, FLAGS.port), factory) except socket.error: logging.error('Tried to connect to port %d, but that address is in use.', FLAGS.port) return -2 try: tag = resource_loader.load_resource('tensorboard/TAG').strip() logging.info('TensorBoard is tag: %s', tag) except IOError: logging.warning('Unable to read TensorBoard tag') tag = '' status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port) print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port)) print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port)) server.serve_forever()
def request_stop(self, ex=None): """Request that the threads stop. After this is called, calls to `should_stop()` will return `True`. Note: If an exception is being passed in, in must be in the context of handling the exception (i.e. `try: ... except Exception as ex: ...`) and not a newly created one. Args: ex: Optional `Exception`, or Python `exc_info` tuple as returned by `sys.exc_info()`. If this is the first call to `request_stop()` the corresponding exception is recorded and re-raised from `join()`. """ ex = self._filter_exception(ex) with self._lock: if not self._stop_event.is_set(): if ex and self._exc_info_to_raise is None: if isinstance(ex, tuple): logging.info("Error reported to Coordinator: %s", compat.as_str_any(ex[1])) self._exc_info_to_raise = ex else: logging.info("Error reported to Coordinator: %s", compat.as_str_any(ex)) self._exc_info_to_raise = sys.exc_info() # self._exc_info_to_raise should contain a tuple containing exception # (type, value, traceback) if (len(self._exc_info_to_raise) != 3 or not self._exc_info_to_raise[0] or not self._exc_info_to_raise[1]): # Raise, catch and record the exception here so that error happens # where expected. try: raise ValueError( "ex must be a tuple or sys.exc_info must return the current " "exception: %s" % self._exc_info_to_raise) except ValueError: # Record this error so it kills the coordinator properly. self._exc_info_to_raise = sys.exc_info() self._stop_event.set()
def MakeGraph(self, max_steps=10, beam_size=2, batch_size=1, **kwargs): """Constructs a structured learning graph.""" assert max_steps > 0, 'Empty network not supported.' logging.info('MakeGraph + %s', kwargs) with self.test_session(graph=tf.Graph()) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=self._task_context)) embedding_dims = [8, 8, 8] hidden_layer_sizes = [] learning_rate = 0.01 builder = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=1, max_steps=max_steps, beam_size=beam_size, gate_gradients=True, use_locking=True, use_averaging=False, check_parameters=False, **kwargs) builder.AddTraining(self._task_context, batch_size, learning_rate=learning_rate, decay_steps=1000, momentum=0.9, corpus_name='training-corpus') builder.AddEvaluation(self._task_context, batch_size, evaluation_max_steps=25, corpus_name=None) builder.training['inits'] = tf.group(*builder.inits.values(), name='inits') return builder
def AddRun(self, path, name=None): """Add a run to the multiplexer. If the name is not specified, it is the same as the path. If a run by that name exists, and we are already watching the right path, do nothing. If we are watching a different path, replace the event accumulator. If `AutoUpdate` or `Reload` have been called, it will `AutoUpdate` or `Reload` the newly created accumulators. This maintains the invariant that once the Multiplexer was activated, all of its accumulators are active. Args: path: Path to the event files (or event directory) for given run. name: Name of the run to add. If not provided, is set to path. Returns: The `EventMultiplexer`. """ if name is None or name is '': name = path accumulator = None with self._accumulators_mutex: if name not in self._accumulators or self._paths[name] != path: if name in self._paths and self._paths[name] != path: # TODO(danmane) - Make it impossible to overwrite an old path with # a new path (just give the new path a distinct name) logging.warning( 'Conflict for name %s: old path %s, new path %s', name, self._paths[name], path) logging.info('Constructing EventAccumulator for %s', path) accumulator = event_accumulator.EventAccumulator( path, self._size_guidance) self._accumulators[name] = accumulator self._paths[name] = path if accumulator: if self._reload_called: accumulator.Reload() if self._autoupdate_called: accumulator.AutoUpdate(self._autoupdate_interval) return self
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) tr = asciitree.LeftAligned() d = to_dict(sentence) print 'Input: %s' % sentence.text print 'Parse:' print tr(d) if finished: break
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) tr = asciitree.LeftAligned() d = to_dict(sentence) print 'Input: %s' % sentence.text print 'Parse:' print tr(d) if finished: break
def request_stop(self, ex=None): """Request that the threads stop. After this is called, calls to should_stop() will return True. Args: ex: Optional Exception, or Python 'exc_info' tuple as returned by sys.exc_info(). If this is the first call to request_stop() the corresponding exception is recorded and re-raised from join(). """ with self._lock: if not self._stop_event.is_set(): if ex and self._exc_info_to_raise is None: if isinstance(ex, tuple): logging.info("Error reported to Coordinator: %s", str(ex[1])) self._exc_info_to_raise = ex else: logging.info("Error reported to Coordinator: %s", str(ex)) self._exc_info_to_raise = sys.exc_info() self._stop_event.set()
def _get_first_op_from_collection(self, key): """Returns the first `Operation` from a collection. Args: key: A string collection key. Returns: The first Op found in a collection, or `None` if the collection is empty. """ try: op_list = ops.get_collection(key) if len(op_list) > 1: logging.info("Found %d %s operations. Returning the first one.", len(op_list), key) if op_list: return op_list[0] except LookupError: pass return None
def _get_first_op_from_collection(self, key): """Returns the first `Operation` from a collection. Args: key: A string collection key. Returns: The first Op found in a collection, or `None` if the collection is empty. """ try: op_list = ops.get_collection(key) if len(op_list) > 1: logging.info("Found %d %s operations. Returning the first one.", len(op_list), key) if op_list: return op_list[0] except LookupError: pass return None
def request_stop(self, ex=None): """Request that the threads stop. After this is called, calls to should_stop() will return True. Args: ex: Optional Exception, or Python 'exc_info' tuple as returned by sys.exc_info(). If this is the first call to request_stop() the corresponding exception is recorded and re-raised from join(). """ with self._lock: if not self._stop_event.is_set(): if ex and self._exc_info_to_raise is None: if isinstance(ex, tuple): logging.info("Error reported to Coordinator: %s", str(ex[1])) self._exc_info_to_raise = ex else: logging.info("Error reported to Coordinator: %s", str(ex)) self._exc_info_to_raise = sys.exc_info() self._stop_event.set()
def l1_regularizer(scale): """Returns a function that can be used to apply L1 regularization to weights. L1 regularization encourages sparsity. Args: scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer. Returns: A function with signature `l1(weights, name=None)` that apply L1 regularization. Raises: ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a float. """ if isinstance(scale, numbers.Integral): raise ValueError('scale cannot be an integer: %s' % scale) if isinstance(scale, numbers.Real): if scale < 0.: raise ValueError( 'Setting a scale less than 0 on a regularizer: %g' % scale) if scale >= 1.: raise ValueError( 'Setting a scale greater than 1 on a regularizer: %g' % scale) if scale == 0.: logging.info('Scale of 0 disables regularizer.') return lambda _, name=None: None def l1(weights, name=None): """Applies L1 regularization to weights.""" with ops.op_scope([weights], name, 'l1_regularizer') as scope: my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') return standard_ops.mul(my_scale, standard_ops.reduce_sum( standard_ops.abs(weights)), name=scope) return l1
def testParsingReaderOpWhileLoop(self): feature_size = 3 batch_size = 5 def ParserEndpoints(): return gen_parser_ops.gold_parse_reader( self._task_context, feature_size, batch_size, corpus_name='training-corpus') with self.test_session() as sess: # The 'condition' and 'body' functions expect as many arguments as there # are loop variables. 'condition' depends on the 'epoch' loop variable # only, so we disregard the remaining unused function arguments. 'body' # returns a list of updated loop variables. def Condition(epoch, *unused_args): return tf.less(epoch, 2) def Body(epoch, num_actions, *feature_args): # By adding one of the outputs of the reader op ('epoch') as a control # dependency to the reader op we force the repeated evaluation of the # reader op. with epoch.graph.control_dependencies([epoch]): features, epoch, gold_actions = ParserEndpoints() num_actions = tf.maximum( num_actions, tf.reduce_max(gold_actions, [0], False) + 1) feature_ids = [] for i in range(len(feature_args)): feature_ids.append(features[i]) return [epoch, num_actions] + feature_ids epoch = ParserEndpoints()[-2] num_actions = tf.constant(0) loop_vars = [epoch, num_actions] res = sess.run( cf.While(Condition, Body, loop_vars, parallel_iterations=1)) logging.info('Result: %s', res) self.assertEqual(res[0], 2)