Ejemplo n.º 1
0
def main(unused_argv=None):
  if FLAGS.debug:
    logging.set_verbosity(logging.DEBUG)

  if not FLAGS.logdir:
    logging.error('A logdir must be specified. Run `tensorboard --help` for '
                  'details and examples.')
    return -1

  if FLAGS.debug:
    logging.info('Starting TensorBoard in directory %s', os.getcwd())

  path_to_run = ParseEventFilesFlag(FLAGS.logdir)
  multiplexer = event_multiplexer.AutoloadingMultiplexer(
      path_to_run=path_to_run, interval_secs=60,
      size_guidance=TENSORBOARD_SIZE_GUIDANCE)

  multiplexer.AutoUpdate(interval=30)

  factory = functools.partial(tensorboard_handler.TensorboardHandler,
                              multiplexer)
  try:
    server = ThreadedHTTPServer((FLAGS.host, FLAGS.port), factory)
  except socket.error:
    logging.error('Tried to connect to port %d, but that address is in use.',
                  FLAGS.port)
    return -2

  status_bar.SetupStatusBarInsideGoogle('TensorBoard', FLAGS.port)
  print('Starting TensorBoard on port %d' % FLAGS.port)
  print('(You can navigate to http://localhost:%d)' % FLAGS.port)
  server.serve_forever()
Ejemplo n.º 2
0
def l1_regularizer(scale):
    """Returns a function that can be used to apply L1 regularization to weights.

  L1 regularization encourages sparsity.

  Args:
    scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.

  Returns:
    A function with signature `l1(weights, name=None)` that apply L1
    regularization.

  Raises:
    ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a
    float.
  """
    if isinstance(scale, numbers.Integral):
        raise ValueError("scale cannot be an integer: %s" % scale)
    if isinstance(scale, numbers.Real):
        if scale < 0.0:
            raise ValueError("Setting a scale less than 0 on a regularizer: %g" % scale)
        if scale >= 1.0:
            raise ValueError("Setting a scale greater than 1 on a regularizer: %g" % scale)
        if scale == 0.0:
            logging.info("Scale of 0 disables regularizer.")
            return lambda _, name=None: None

    def l1(weights, name=None):
        """Applies L1 regularization to weights."""
        with ops.op_scope([weights], name, "l1_regularizer") as scope:
            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name="scale")
            return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.abs(weights)), name=scope)

    return l1
Ejemplo n.º 3
0
    def testParsingReaderOp(self):
        # Runs the reader over the test input for two epochs.
        num_steps_a = 0
        num_actions = 0
        num_word_ids = 0
        num_tag_ids = 0
        num_label_ids = 0
        batch_size = 10
        with self.test_session() as sess:
            (words, tags,
             labels), epochs, gold_actions = (gen_parser_ops.gold_parse_reader(
                 self._task_context,
                 3,
                 batch_size,
                 corpus_name='training-corpus'))
            while True:
                tf_gold_actions, tf_epochs, tf_words, tf_tags, tf_labels = (
                    sess.run([gold_actions, epochs, words, tags, labels]))
                num_steps_a += 1
                num_actions = max(num_actions, max(tf_gold_actions) + 1)
                num_word_ids = max(num_word_ids, self.GetMaxId(tf_words) + 1)
                num_tag_ids = max(num_tag_ids, self.GetMaxId(tf_tags) + 1)
                num_label_ids = max(num_label_ids,
                                    self.GetMaxId(tf_labels) + 1)
                self.assertIn(tf_epochs, [0, 1, 2])
                if tf_epochs > 1:
                    break

        # Runs the reader again, this time with a lot of added graph nodes.
        num_steps_b = 0
        with self.test_session() as sess:
            num_features = [6, 6, 4]
            num_feature_ids = [num_word_ids, num_tag_ids, num_label_ids]
            embedding_sizes = [8, 8, 8]
            hidden_layer_sizes = [32, 32]
            # Here we aim to test the iteration of the reader op in a complex network,
            # not the GraphBuilder.
            parser = graph_builder.GreedyParser(num_actions, num_features,
                                                num_feature_ids,
                                                embedding_sizes,
                                                hidden_layer_sizes)
            parser.AddTraining(self._task_context,
                               batch_size,
                               corpus_name='training-corpus')
            sess.run(parser.inits.values())
            while True:
                tf_epochs, tf_cost, _ = sess.run([
                    parser.training['epochs'], parser.training['cost'],
                    parser.training['train_op']
                ])
                num_steps_b += 1
                self.assertGreaterEqual(tf_cost, 0)
                self.assertIn(tf_epochs, [0, 1, 2])
                if tf_epochs > 1:
                    break

        # Assert that the two runs made the exact same number of steps.
        logging.info('Number of steps in the two runs: %d, %d', num_steps_a,
                     num_steps_b)
        self.assertEqual(num_steps_a, num_steps_b)
Ejemplo n.º 4
0
def l2_regularizer(scale):
    """Returns a function that can be used to apply L2 regularization to weights.

  Small values of L2 can help prevent overfitting the training data.

  Args:
    scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.

  Returns:
    A function with signature `l2(weights, name=None)` that applies L2
    regularization.

  Raises:
    ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a
    float.
  """
    if isinstance(scale, numbers.Integral):
        raise ValueError("scale cannot be an integer: %s" % (scale,))
    if isinstance(scale, numbers.Real):
        if scale < 0.0:
            raise ValueError("Setting a scale less than 0 on a regularizer: %g." % scale)
        if scale >= 1.0:
            raise ValueError("Setting a scale greater than 1 on a regularizer: %g." % scale)
        if scale == 0.0:
            logging.info("Scale of 0 disables regularizer.")
            return lambda _, name=None: None

    def l2(weights, name=None):
        """Applies l2 regularization to weights."""
        with ops.op_scope([weights], name, "l2_regularizer") as scope:
            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name="scale")
            return standard_ops.mul(my_scale, nn.l2_loss(weights), name=scope)

    return l2
Ejemplo n.º 5
0
  def wait_for_session(self, master, config=None):
    """Creates a new `Session` and waits for model to be ready.

    Creates a new `Session` on 'master'.  Waits for the model to be
    initialized or recovered from a checkpoint.  It's expected that
    another thread or process will make the model ready, and that this
    is intended to be used by threads/processes that participate in a
    distributed training configuration where a different thread/process
    is responsible for initializing or recovering the model being trained.

    Args:
      master: `String` representation of the TensorFlow master to use.
      config: Optional ConfigProto proto used to configure the session.

    Returns:
      sess: A `Session`.
    """
    target = self._maybe_launch_in_process_server(master)
    sess = session.Session(target, graph=self._graph, config=config)
    if self._local_init_op:
      sess.run([self._local_init_op])
    while True:
      not_ready = self._model_not_ready(sess)
      if not not_ready:
        break
      self._safe_close(sess)
      logging.info("Waiting for model to be ready: %s", not_ready)
      time.sleep(self._recovery_wait_secs)
      sess = session.Session(master, graph=self._graph)

    return sess
Ejemplo n.º 6
0
 def Load():
   for (path, name) in six.iteritems(path_to_run):
     logging.info('Checking for new runs in %s', path)
     multiplexer.AddRunsFromDirectory(path, name)
   t = threading.Timer(interval_secs, Load)
   t.daemon = True
   t.start()
Ejemplo n.º 7
0
    def _serve_static_file(self, path):
        """Serves the static file located at the given path.

    Args:
      path: The path of the static file, relative to the tensorboard/ directory.
    """
        # Strip off the leading forward slash.
        path = path.lstrip('/')
        if not self._path_is_safe(path):
            logging.info('path %s not safe, sending 404', path)
            # Traversal attack, so 404.
            self.send_error(404)
            return

        if path.startswith('external'):
            path = os.path.join('../', path)
        else:
            path = os.path.join('tensorboard', path)
        # Open the file and read it.
        try:
            contents = resource_loader.load_resource(path)
        except IOError:
            logging.info('path %s not found, sending 404', path)
            self.send_error(404)
            return

        self.send_response(200)

        mimetype = mimetypes.guess_type(path)[0] or 'application/octet-stream'
        self.send_header('Content-Type', mimetype)
        self.end_headers()
        self.wfile.write(contents)
  def _serve_static_file(self, path):
    """Serves the static file located at the given path.

    Args:
      path: The path of the static file, relative to the tensorboard/ directory.
    """
    # Strip off the leading forward slash.
    path = path.lstrip('/')
    if not self._path_is_safe(path):
      logging.info('path %s not safe, sending 404', path)
      # Traversal attack, so 404.
      self.send_error(404)
      return

    if path.startswith('external'):
      path = os.path.join('../', path)
    else:
      path = os.path.join('tensorboard', path)
    # Open the file and read it.
    try:
      contents = resource_loader.load_resource(path)
    except IOError:
      logging.info('path %s not found, sending 404', path)
      self.send_error(404)
      return

    self.send_response(200)

    mimetype = mimetypes.guess_type(path)[0] or 'application/octet-stream'
    self.send_header('Content-Type', mimetype)
    self.end_headers()
    self.wfile.write(contents)
Ejemplo n.º 9
0
 def Load():
     for (path, name) in six.iteritems(path_to_run):
         logging.info('Checking for new runs in %s', path)
         multiplexer.AddRunsFromDirectory(path, name)
     t = threading.Timer(interval_secs, Load)
     t.daemon = True
     t.start()
Ejemplo n.º 10
0
def ListRecursively(top):
  """Walks a directory tree, yielding (dir_path, file_paths) tuples.

  For each top |top| and its subdirectories, yields a tuple containing the path
  to the directory and the path to each of the contained files.  Note that
  unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file
  paths are all absolute.

  Args:
    top: A path to a GCS directory.
  Returns:
    A list of (dir_path, file_paths) tuples.

  """
  if top.endswith('/'):
    wildcard = top + '**'
  else:
    wildcard = top + '/**'
  tuples = []
  try:
    file_paths = ListDirectory(wildcard)
  except subprocess.CalledProcessError as e:
    logging.info('%s, assuming it means no files were found', e)
    return []
  for file_path in file_paths:
    dir_path = os.path.dirname(file_path)
    if tuples and tuples[-1][0] == dir_path:
      tuples[-1][1].append(file_path)
    else:
      tuples.append((dir_path, [file_path]))
  return tuples
Ejemplo n.º 11
0
    def wait_for_session(self, master, config=None):
        """Creates a new `Session` and waits for model to be ready.

    Creates a new `Session` on 'master'.  Waits for the model to be
    initialized or recovered from a checkpoint.  It's expected that
    another thread or process will make the model ready, and that this
    is intended to be used by threads/processes that participate in a
    distributed training configuration where a different thread/process
    is responsible for initializing or recovering the model being trained.

    Args:
      master: `String` representation of the TensorFlow master to use.
      config: Optional ConfigProto proto used to configure the session.

    Returns:
      sess: A `Session`.
    """
        target = self._maybe_launch_in_process_server(master)
        sess = session.Session(target, graph=self._graph, config=config)
        if self._local_init_op:
            sess.run([self._local_init_op])
        while True:
            not_ready = self._model_not_ready(sess)
            if not not_ready:
                break
            self._safe_close(sess)
            logging.info("Waiting for model to be ready: %s", not_ready)
            time.sleep(self._recovery_wait_secs)
            sess = session.Session(target, graph=self._graph, config=config)

        return sess
Ejemplo n.º 12
0
 def predicate(e):
     err_str = e.message
     op = e.op
     while op is not None:
         err_str += "\nCaused by: " + op.name
         op = op._original_op
     logging.info("Searching within error strings: '%s' within '%s'", expected_err_re_or_predicate, err_str)
     return re.search(expected_err_re_or_predicate, err_str)
Ejemplo n.º 13
0
 def testParseUntilNotAlive(self):
   """Ensures that the 'alive' condition works in the Cond ops."""
   with self.test_session(graph=tf.Graph()) as sess:
     t = self.MakeGraph(batch_size=3, beam_size=2, max_steps=5).training
     sess.run(t['inits'])
     for i in range(5):
       logging.info('run %d', i)
       tf_alive = t['alive'].eval()
       self.assertFalse(any(tf_alive))
Ejemplo n.º 14
0
 def testCoNLLFormat(self):
   self.WriteContext('conll-sentence')
   logging.info('Writing conll file to: %s', self.corpus_file)
   with open(self.corpus_file, 'w') as f:
     f.write((CONLL_DOC1 + u'\n\n' + CONLL_DOC2 + u'\n')
             .replace(' ', '\t').encode('utf-8'))
   self.ValidateDocuments()
   self.BuildLexicon()
   self.ValidateTagToCategoryMap()
Ejemplo n.º 15
0
 def predicate(e):
   err_str = e.message
   op = e.op
   while op is not None:
     err_str += "\nCaused by: " + op.name
     op = op._original_op
   logging.info("Searching within error strings: '%s' within '%s'",
                expected_err_re_or_predicate, err_str)
   return re.search(expected_err_re_or_predicate, err_str)
Ejemplo n.º 16
0
    def wait_for_session(self,
                         master,
                         config=None,
                         max_wait_secs=float("Inf")):
        """Creates a new `Session` and waits for model to be ready.

    Creates a new `Session` on 'master'.  Waits for the model to be
    initialized or recovered from a checkpoint.  It's expected that
    another thread or process will make the model ready, and that this
    is intended to be used by threads/processes that participate in a
    distributed training configuration where a different thread/process
    is responsible for initializing or recovering the model being trained.

    NB: The amount of time this method waits for the session is bounded
    by max_wait_secs. By default, this function will wait indefinitely.

    Args:
      master: `String` representation of the TensorFlow master to use.
      config: Optional ConfigProto proto used to configure the session.
      max_wait_secs: Maximum time to wait for the session to become available.

    Returns:
      A `Session`. May be None if the operation exceeds the timeout
      specified by config.operation_timeout_in_ms.

    Raises:
      tf.DeadlineExceededError: if the session is not available after
        max_wait_secs.
    """
        target = self._maybe_launch_in_process_server(master)

        if max_wait_secs is None:
            max_wait_secs = float("Inf")
        timer = _CountDownTimer(max_wait_secs)

        while True:
            sess = session.Session(target, graph=self._graph, config=config)
            if self._local_init_op:
                sess.run([self._local_init_op])
            not_ready = self._model_not_ready(sess)
            if not not_ready:
                return sess

            self._safe_close(sess)

            # Do we have enough time left to try again?
            remaining_ms_after_wait = (timer.secs_remaining() -
                                       self._recovery_wait_secs)
            if remaining_ms_after_wait < 0:
                raise errors.DeadlineExceededError(
                    None, None,
                    "Session was not ready after waiting %d secs." %
                    (max_wait_secs, ))

            logging.info("Waiting for model to be ready: %s", not_ready)
            time.sleep(self._recovery_wait_secs)
Ejemplo n.º 17
0
 def WriteContext(self, corpus_format):
   context = task_spec_pb2.TaskSpec()
   self.AddInput('documents', self.corpus_file, corpus_format, context)
   for name in ('word-map', 'lcword-map', 'tag-map',
                'category-map', 'label-map', 'prefix-table',
                'suffix-table', 'tag-to-category'):
     self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context)
   logging.info('Writing context to: %s', self.context_file)
   with open(self.context_file, 'w') as f:
     f.write(str(context))
Ejemplo n.º 18
0
 def _Load():
   start = time.time()
   for (path, name) in six.iteritems(path_to_run):
     multiplexer.AddRunsFromDirectory(path, name)
   multiplexer.Reload()
   duration = time.time() - start
   logging.info('Multiplexer done loading. Load took %0.1f secs', duration)
   t = threading.Timer(LOAD_INTERVAL, _Load)
   t.daemon = True
   t.start()
Ejemplo n.º 19
0
 def WriteContext(self, corpus_format):
   context = task_spec_pb2.TaskSpec()
   self.AddInput('documents', self.corpus_file, corpus_format, context)
   for name in ('word-map', 'lcword-map', 'tag-map',
                'category-map', 'label-map', 'prefix-table',
                'suffix-table', 'tag-to-category'):
     self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context)
   logging.info('Writing context to: %s', self.context_file)
   with open(self.context_file, 'w') as f:
     f.write(str(context))
Ejemplo n.º 20
0
 def _Load():
   start = time.time()
   for (path, name) in six.iteritems(path_to_run):
     multiplexer.AddRunsFromDirectory(path, name)
   multiplexer.Reload()
   duration = time.time() - start
   logging.info('Multiplexer done loading. Load took %0.1f secs', duration)
   t = threading.Timer(LOAD_INTERVAL, _Load)
   t.daemon = True
   t.start()
Ejemplo n.º 21
0
def convert_variables_to_constants(sess, input_graph_def, output_node_names):
  """Replaces all the variables in a graph with constants of the same values.

  If you have a trained graph containing Variable ops, it can be convenient to
  convert them all to Const ops holding the same values. This makes it possible
  to describe the network fully with a single GraphDef file, and allows the
  removal of a lot of ops related to loading and saving the variables.

  Args:
    sess: Active TensorFlow session containing the variables.
    input_graph_def: GraphDef object holding the network.
    output_node_names: List of name strings for the result nodes of the graph.

  Returns:
    GraphDef containing a simplified version of the original.
  """
  found_variables = {}
  variable_names = []
  variable_dict_names = []
  for node in input_graph_def.node:
    if node.op == "Assign":
      variable_name = node.input[0]
      variable_dict_names.append(variable_name)
      variable_names.append(variable_name + ":0")
  if variable_names:
    returned_variables = sess.run(variable_names)
  else:
    returned_variables = []
  found_variables = dict(zip(variable_dict_names, returned_variables))
  logging.info("Frozen %d variables." % len(returned_variables))

  # This graph only includes the nodes needed to evaluate the output nodes, and
  # removes unneeded nodes like those involved in saving and assignment.
  inference_graph = extract_sub_graph(input_graph_def, output_node_names)

  output_graph_def = graph_pb2.GraphDef()
  how_many_converted = 0
  for input_node in inference_graph.node:
    output_node = graph_pb2.NodeDef()
    if input_node.name in found_variables:
      output_node.op = "Const"
      output_node.name = input_node.name
      dtype = input_node.attr["dtype"]
      data = found_variables[input_node.name]
      output_node.attr["dtype"].CopyFrom(dtype)
      output_node.attr["value"].CopyFrom(attr_value_pb2.AttrValue(
          tensor=tensor_util.make_tensor_proto(data,
                                               dtype=dtype.type,
                                               shape=data.shape)))
      how_many_converted += 1
    else:
      output_node.CopyFrom(input_node)
    output_graph_def.node.extend([output_node])
  print("Converted %d variables to const ops." % how_many_converted)
  return output_graph_def
Ejemplo n.º 22
0
 def CheckTokenization(self, sentence, tokenization):
   self.WriteContext('english-text')
   logging.info('Writing text file to: %s', self.corpus_file)
   with open(self.corpus_file, 'w') as f:
     f.write(sentence)
   sentence, _ = gen_parser_ops.document_source(
       self.context_file, batch_size=1)
   with self.test_session() as sess:
     sentence_doc = self.ReadNextDocument(sess, sentence)
     self.assertEqual(' '.join([t.word for t in sentence_doc.token]),
                      tokenization)
Ejemplo n.º 23
0
def convert_variables_to_constants(sess, input_graph_def, output_node_names):
    """Replaces all the variables in a graph with constants of the same values.

  If you have a trained graph containing Variable ops, it can be convenient to
  convert them all to Const ops holding the same values. This makes it possible
  to describe the network fully with a single GraphDef file, and allows the
  removal of a lot of ops related to loading and saving the variables.

  Args:
    sess: Active TensorFlow session containing the variables.
    input_graph_def: GraphDef object holding the network.
    output_node_names: List of name strings for the result nodes of the graph.

  Returns:
    GraphDef containing a simplified version of the original.
  """
    found_variables = {}
    variable_names = []
    variable_dict_names = []
    for node in input_graph_def.node:
        if node.op == "Assign":
            variable_name = node.input[0]
            variable_dict_names.append(variable_name)
            variable_names.append(variable_name + ":0")
    if variable_names:
        returned_variables = sess.run(variable_names)
    else:
        returned_variables = []
    found_variables = dict(zip(variable_dict_names, returned_variables))
    logging.info("Frozen %d variables." % len(returned_variables))

    # This graph only includes the nodes needed to evaluate the output nodes, and
    # removes unneeded nodes like those involved in saving and assignment.
    inference_graph = extract_sub_graph(input_graph_def, output_node_names)

    output_graph_def = graph_pb2.GraphDef()
    how_many_converted = 0
    for input_node in inference_graph.node:
        output_node = graph_pb2.NodeDef()
        if input_node.name in found_variables:
            output_node.op = "Const"
            output_node.name = input_node.name
            dtype = input_node.attr["dtype"]
            data = found_variables[input_node.name]
            output_node.attr["dtype"].CopyFrom(dtype)
            output_node.attr["value"].CopyFrom(
                attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(
                    data, dtype=dtype.type, shape=data.shape)))
            how_many_converted += 1
        else:
            output_node.CopyFrom(input_node)
        output_graph_def.node.extend([output_node])
    print("Converted %d variables to const ops." % how_many_converted)
    return output_graph_def
Ejemplo n.º 24
0
 def CheckTokenization(self, sentence, tokenization):
     self.WriteContext('english-text')
     logging.info('Writing text file to: %s', self.corpus_file)
     with open(self.corpus_file, 'w') as f:
         f.write(sentence)
     sentence, _ = gen_parser_ops.document_source(self.context_file,
                                                  batch_size=1)
     with self.test_session() as sess:
         sentence_doc = self.ReadNextDocument(sess, sentence)
         self.assertEqual(' '.join([t.word for t in sentence_doc.token]),
                          tokenization)
Ejemplo n.º 25
0
  def AddRunsFromDirectory(self, path, name=None):
    """Load runs from a directory; recursively walks subdirectories.

    If path doesn't exist, no-op. This ensures that it is safe to call
      `AddRunsFromDirectory` multiple times, even before the directory is made.

    If path is a directory, load event files in the directory (if any exist) and
      recursively call AddRunsFromDirectory on any subdirectories. This mean you
      can call AddRunsFromDirectory at the root of a tree of event logs and
      TensorBoard will load them all.

    If the `EventMultiplexer` is already loaded this will cause
    the newly created accumulators to `Reload()`.
    Args:
      path: A string path to a directory to load runs from.
      name: Optionally, what name to apply to the runs. If name is provided
        and the directory contains run subdirectories, the name of each subrun
        is the concatenation of the parent name and the subdirectory name. If
        name is provided and the directory contains event files, then a run
        is added called "name" and with the events from the path.

    Raises:
      ValueError: If the path exists and isn't a directory.

    Returns:
      The `EventMultiplexer`.
    """
    subdirs = []
    if gcs.IsGCSPath(path):
      subdirs = [
          subdir
          for (subdir, files) in gcs.ListRecursively(path)
          if list(filter(event_accumulator.IsTensorFlowEventsFile, files))
      ]
    else:
      if not gfile.Exists(path):
        return  # Maybe it hasn't been created yet, fail silently to retry later
      if not gfile.IsDirectory(path):
        raise ValueError('AddRunsFromDirectory: path exists and is not a '
                         'directory, %s' % path)
      subdirs = [
          subdir
          for (subdir, _, files) in gfile.Walk(path)
          if list(filter(event_accumulator.IsTensorFlowEventsFile, files))
      ]

    for subdir in subdirs:
      logging.info('Adding events from directory %s', subdir)
      rpath = os.path.relpath(subdir, path)
      subname = os.path.join(name, rpath) if name else rpath
      self.AddRun(subdir, name=subname)

    return self
    def AddRunsFromDirectory(self, path, name=None):
        """Load runs from a directory; recursively walks subdirectories.

    If path doesn't exist, no-op. This ensures that it is safe to call
      `AddRunsFromDirectory` multiple times, even before the directory is made.

    If path is a directory, load event files in the directory (if any exist) and
      recursively call AddRunsFromDirectory on any subdirectories. This mean you
      can call AddRunsFromDirectory at the root of a tree of event logs and
      TensorBoard will load them all.

    If the `EventMultiplexer` is already loaded this will cause
    the newly created accumulators to `Reload()`.
    Args:
      path: A string path to a directory to load runs from.
      name: Optionally, what name to apply to the runs. If name is provided
        and the directory contains run subdirectories, the name of each subrun
        is the concatenation of the parent name and the subdirectory name. If
        name is provided and the directory contains event files, then a run
        is added called "name" and with the events from the path.

    Raises:
      ValueError: If the path exists and isn't a directory.

    Returns:
      The `EventMultiplexer`.
    """
        subdirs = []
        if gcs.IsGCSPath(path):
            subdirs = [
                subdir
                for (subdir, files) in gcs.ListRecursively(path) if list(
                    filter(event_accumulator.IsTensorFlowEventsFile, files))
            ]
        else:
            if not gfile.Exists(path):
                return  # Maybe it hasn't been created yet, fail silently to retry later
            if not gfile.IsDirectory(path):
                raise ValueError(
                    'AddRunsFromDirectory: path exists and is not a '
                    'directory, %s' % path)
            subdirs = [
                subdir for (subdir, _, files) in gfile.Walk(path) if list(
                    filter(event_accumulator.IsTensorFlowEventsFile, files))
            ]

        for subdir in subdirs:
            logging.info('Adding events from directory %s', subdir)
            rpath = os.path.relpath(subdir, path)
            subname = os.path.join(name, rpath) if name else rpath
            self.AddRun(subdir, name=subname)

        return self
    def Load(self):
        """Loads new values from disk.

        The watcher will load from one file at a time; as soon as that file stops
        yielding events, it will move on to the next file. We assume that old files
        are never modified after a newer file has been written. As a result, Load()
        can be called multiple times in a row without losing events that have not
        been yielded yet. In other words, we guarantee that every event will be
        yielded exactly once.

        Yields:
          All values that were written to disk that have not been yielded yet.
        """

        # If the loader exists, check it for a value.
        if not self._loader:
            self._InitializeLoader()

        while True:
            # Yield all the new events in the file we're currently loading from.
            for event in self._loader.Load():
                yield event

            next_path = self._GetNextPath()
            if not next_path:
                logging.info('No more files in %s', self._directory)
                # Current file is empty and there are no new files, so we're done.
                return

            # There's a new file, so check to make sure there weren't any events
            # written between when we finished reading the current file and when we
            # checked for the new one. The sequence of events might look something
            # like this:
            #
            # 1. Event #1 written to file #1.
            # 2. We check for events and yield event #1 from file #1
            # 3. We check for events and see that there are no more events in file #1.
            # 4. Event #2 is written to file #1.
            # 5. Event #3 is written to file #2.
            # 6. We check for a new file and see that file #2 exists.
            #
            # Without this loop, we would miss event #2. We're also guaranteed by the
            # loader contract that no more events will be written to file #1 after
            # events start being written to file #2, so we don't have to worry about
            # that.
            for event in self._loader.Load():
                yield event

            logging.info('Directory watcher for %s advancing to file %s',
                         self._directory, next_path)

            # Advance to the next file and start over.
            self._SetPath(next_path)
Ejemplo n.º 28
0
  def Load(self):
    """Loads new values from disk.

    The watcher will load from one file at a time; as soon as that file stops
    yielding events, it will move on to the next file. We assume that old files
    are never modified after a newer file has been written. As a result, Load()
    can be called multiple times in a row without losing events that have not
    been yielded yet. In other words, we guarantee that every event will be
    yielded exactly once.

    Yields:
      All values that were written to disk that have not been yielded yet.
    """

    # If the loader exists, check it for a value.
    if not self._loader:
      self._InitializeLoader()

    while True:
      # Yield all the new events in the file we're currently loading from.
      for event in self._loader.Load():
        yield event

      next_path = self._GetNextPath()
      if not next_path:
        logging.info('No more files in %s', self._directory)
        # Current file is empty and there are no new files, so we're done.
        return

      # There's a new file, so check to make sure there weren't any events
      # written between when we finished reading the current file and when we
      # checked for the new one. The sequence of events might look something
      # like this:
      #
      # 1. Event #1 written to file #1.
      # 2. We check for events and yield event #1 from file #1
      # 3. We check for events and see that there are no more events in file #1.
      # 4. Event #2 is written to file #1.
      # 5. Event #3 is written to file #2.
      # 6. We check for a new file and see that file #2 exists.
      #
      # Without this loop, we would miss event #2. We're also guaranteed by the
      # loader contract that no more events will be written to file #1 after
      # events start being written to file #2, so we don't have to worry about
      # that.
      for event in self._loader.Load():
        yield event

      logging.info('Directory watcher for %s advancing to file %s',
                   self._directory, next_path)

      # Advance to the next file and start over.
      self._SetPath(next_path)
Ejemplo n.º 29
0
  def wait_for_session(self, master, config=None, max_wait_secs=float("Inf")):
    """Creates a new `Session` and waits for model to be ready.

    Creates a new `Session` on 'master'.  Waits for the model to be
    initialized or recovered from a checkpoint.  It's expected that
    another thread or process will make the model ready, and that this
    is intended to be used by threads/processes that participate in a
    distributed training configuration where a different thread/process
    is responsible for initializing or recovering the model being trained.

    NB: The amount of time this method waits for the session is bounded
    by max_wait_secs. By default, this function will wait indefinitely.

    Args:
      master: `String` representation of the TensorFlow master to use.
      config: Optional ConfigProto proto used to configure the session.
      max_wait_secs: Maximum time to wait for the session to become available.

    Returns:
      A `Session`. May be None if the operation exceeds the timeout
      specified by config.operation_timeout_in_ms.

    Raises:
      tf.DeadlineExceededError: if the session is not available after
        max_wait_secs.
    """
    target = self._maybe_launch_in_process_server(master)

    if max_wait_secs is None:
      max_wait_secs = float("Inf")
    timer = _CountDownTimer(max_wait_secs)

    while True:
      sess = session.Session(target, graph=self._graph, config=config)
      if self._local_init_op:
        sess.run([self._local_init_op])
      not_ready = self._model_not_ready(sess)
      if not not_ready:
        return sess

      self._safe_close(sess)

      # Do we have enough time left to try again?
      remaining_ms_after_wait = (
          timer.secs_remaining() - self._recovery_wait_secs)
      if remaining_ms_after_wait < 0:
        raise errors.DeadlineExceededError(
            None, None,
            "Session was not ready after waiting %d secs." % (max_wait_secs,))

      logging.info("Waiting for model to be ready: %s", not_ready)
      time.sleep(self._recovery_wait_secs)
Ejemplo n.º 30
0
    def testParsingReaderOp(self):
        # Runs the reader over the test input for two epochs.
        num_steps_a = 0
        num_actions = 0
        num_word_ids = 0
        num_tag_ids = 0
        num_label_ids = 0
        batch_size = 10
        with self.test_session() as sess:
            (words, tags, labels), epochs, gold_actions = gen_parser_ops.gold_parse_reader(
                self._task_context, 3, batch_size, corpus_name="training-corpus"
            )
            while True:
                tf_gold_actions, tf_epochs, tf_words, tf_tags, tf_labels = sess.run(
                    [gold_actions, epochs, words, tags, labels]
                )
                num_steps_a += 1
                num_actions = max(num_actions, max(tf_gold_actions) + 1)
                num_word_ids = max(num_word_ids, self.GetMaxId(tf_words) + 1)
                num_tag_ids = max(num_tag_ids, self.GetMaxId(tf_tags) + 1)
                num_label_ids = max(num_label_ids, self.GetMaxId(tf_labels) + 1)
                self.assertIn(tf_epochs, [0, 1, 2])
                if tf_epochs > 1:
                    break

        # Runs the reader again, this time with a lot of added graph nodes.
        num_steps_b = 0
        with self.test_session() as sess:
            num_features = [6, 6, 4]
            num_feature_ids = [num_word_ids, num_tag_ids, num_label_ids]
            embedding_sizes = [8, 8, 8]
            hidden_layer_sizes = [32, 32]
            # Here we aim to test the iteration of the reader op in a complex network,
            # not the GraphBuilder.
            parser = graph_builder.GreedyParser(
                num_actions, num_features, num_feature_ids, embedding_sizes, hidden_layer_sizes
            )
            parser.AddTraining(self._task_context, batch_size, corpus_name="training-corpus")
            sess.run(parser.inits.values())
            while True:
                tf_epochs, tf_cost, _ = sess.run(
                    [parser.training["epochs"], parser.training["cost"], parser.training["train_op"]]
                )
                num_steps_b += 1
                self.assertGreaterEqual(tf_cost, 0)
                self.assertIn(tf_epochs, [0, 1, 2])
                if tf_epochs > 1:
                    break

        # Assert that the two runs made the exact same number of steps.
        logging.info("Number of steps in the two runs: %d, %d", num_steps_a, num_steps_b)
        self.assertEqual(num_steps_a, num_steps_b)
Ejemplo n.º 31
0
def pin_to_cpu(op):
  """Returns a CPU device for the given node."""
  device = op.device if op.device is not None else ""
  dev = pydev.from_string(device)

  if not dev.device_type:
    return set_cpu0(device)
  if dev.device_type == "CPU":
    return device

  logging.info("Operation %s has been assigned to a non-CPU (%s), so "
               "it will not be pinned to the CPU.", op.name, dev.device_type)
  return device
Ejemplo n.º 32
0
def pin_to_cpu(op):
  """Returns a CPU device for the given node."""
  device = op.device if op.device is not None else ""
  dev = pydev.from_string(device)

  if not dev.device_type:
    return set_cpu0(device)
  if dev.device_type == "CPU":
    return device

  logging.info("Operation %s has been assigned to a non-CPU (%s), so "
               "it will not be pinned to the CPU.", op.name, dev.device_type)
  return device
Ejemplo n.º 33
0
def ReloadMultiplexer(multiplexer, path_to_run):
  """Loads all runs into the multiplexer.

  Args:
    multiplexer: The `EventMultiplexer` to add runs to and reload.
    path_to_run: A dict mapping from paths to run names, where `None` as the run
      name is interpreted as a run name equal to the path.
  """
  start = time.time()
  for (path, name) in six.iteritems(path_to_run):
    multiplexer.AddRunsFromDirectory(path, name)
  multiplexer.Reload()
  duration = time.time() - start
  logging.info('Multiplexer done loading. Load took %0.1f secs', duration)
Ejemplo n.º 34
0
    def AddRunsFromDirectory(self, path, name=None):
        """Load runs from a directory, assuming each subdirectory is a run.

    If path doesn't exist, no-op. This ensures that it is safe to call
      `AddRunsFromDirectory` multiple times, even before the directory is made.

    If the directory contains TensorFlow event files, it is itself treated as a
      run.

    If the `EventMultiplexer` is already loaded or autoupdating, this will cause
    the newly created accumulators to also `Reload()` or `AutoUpdate()`.

    Args:
      path: A string path to a directory to load runs from.
      name: Optionally, what name to apply to the runs. If name is provided
        and the directory contains run subdirectories, the name of each subrun
        is the concatenation of the parent name and the subdirectory name. If
        name is provided and the directory contains event files, then a run
        is added called "name" and with the events from the path.

    Raises:
      ValueError: If the path exists and isn't a directory.

    Returns:
      The `EventMultiplexer`.
    """
        if not gfile.Exists(path):
            return  # Maybe it hasn't been created yet, fail silently to retry later
        if not gfile.IsDirectory(path):
            raise ValueError('Path exists and is not a directory, %s' % path)
        paths = gfile.ListDirectory(path)
        is_directory = lambda x: gfile.IsDirectory(os.path.join(path, x))
        subdirectories = filter(is_directory, paths)
        for s in subdirectories:
            if name:
                subname = '/'.join([name, s])
            else:
                subname = s
            self.AddRun(os.path.join(path, s), subname)

        if list(filter(event_accumulator.IsTensorFlowEventsFile, paths)):
            directory_name = os.path.split(path)[1]
            logging.info('Directory %s has event files; loading',
                         directory_name)
            if name:
                dname = name
            else:
                dname = directory_name
            self.AddRun(path, dname)
        return self
Ejemplo n.º 35
0
  def AddRunsFromDirectory(self, path, name=None):
    """Load runs from a directory, assuming each subdirectory is a run.

    If path doesn't exist, no-op. This ensures that it is safe to call
      `AddRunsFromDirectory` multiple times, even before the directory is made.

    If the directory contains TensorFlow event files, it is itself treated as a
      run.

    If the `EventMultiplexer` is already loaded or autoupdating, this will cause
    the newly created accumulators to also `Reload()` or `AutoUpdate()`.

    Args:
      path: A string path to a directory to load runs from.
      name: Optionally, what name to apply to the runs. If name is provided
        and the directory contains run subdirectories, the name of each subrun
        is the concatenation of the parent name and the subdirectory name. If
        name is provided and the directory contains event files, then a run
        is added called "name" and with the events from the path.

    Raises:
      ValueError: If the path exists and isn't a directory.

    Returns:
      The `EventMultiplexer`.
    """
    if not gfile.Exists(path):
      return  # Maybe it hasn't been created yet, fail silently to retry later
    if not gfile.IsDirectory(path):
      raise ValueError('Path exists and is not a directory, %s'  % path)
    paths = gfile.ListDirectory(path)
    is_directory = lambda x: gfile.IsDirectory(os.path.join(path, x))
    subdirectories = filter(is_directory, paths)
    for s in subdirectories:
      if name:
        subname = '/'.join([name, s])
      else:
        subname = s
      self.AddRun(os.path.join(path, s), subname)

    if list(filter(event_accumulator.IsTensorFlowEventsFile, paths)):
      directory_name = os.path.split(path)[1]
      logging.info('Directory %s has event files; loading', directory_name)
      if name:
        dname = name
      else:
        dname = directory_name
      self.AddRun(path, dname)
    return self
Ejemplo n.º 36
0
    def _call_func(self, args, kwargs, check_for_new_variables):
        try:
            vars_at_start = len(ops.get_collection(ops.GraphKeys.VARIABLES))
            trainable_at_start = len(
                ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))

            result = self._func(*args, **kwargs)
            if check_for_new_variables:
                trainable_variables = ops.get_collection(
                    ops.GraphKeys.TRAINABLE_VARIABLES)
                # If a variable that we intend to train is created as a side effect
                # of creating a template, then that is almost certainly an error.
                if trainable_at_start != len(trainable_variables):
                    raise ValueError(
                        "Trainable variable created when calling a template "
                        "after the first time, perhaps you used tf.Variable "
                        "when you meant tf.get_variable: %s" %
                        (trainable_variables[trainable_at_start:], ))

                # Non-trainable tracking variables are a legitimate reason why a new
                # variable would be created, but it is a relatively advanced use-case,
                # so log it.
                variables = ops.get_collection(ops.GraphKeys.VARIABLES)
                if vars_at_start != len(variables):
                    logging.info(
                        "New variables created when calling a template after "
                        "the first time, perhaps you used tf.Variable when you "
                        "meant tf.get_variable: %s", variables[vars_at_start:])
            return result
        except Exception, exc:
            # Reraise the exception, but append the original definition to the
            # trace.
            args = exc.args
            if not args:
                arg0 = ""
            else:
                arg0 = args[0]
            trace = "".join(
                _skip_common_stack_elements(self._stacktrace,
                                            traceback.format_stack()))
            arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace)
            new_args = [arg0]
            new_args.extend(args[1:])
            exc.args = tuple(new_args)
            raise
Ejemplo n.º 37
0
  def _AddParam(self,
                shape,
                dtype,
                name,
                initializer=None,
                return_average=False):
    """Add a model parameter w.r.t. we expect to compute gradients.

    _AddParam creates both regular parameters (usually for training) and
    averaged nodes (usually for inference). It returns one or the other based
    on the 'return_average' arg.

    Args:
      shape: int list, tensor shape of the parameter to create
      dtype: tf.DataType, data type of the parameter
      name: string, name of the parameter in the TF graph
      initializer: optional initializer for the paramter
      return_average: if False, return parameter otherwise return moving average

    Returns:
      parameter or averaged parameter
    """
    if name not in self.params:
      step = tf.cast(self.GetStep(), tf.float32)
      # Put all parameters and their initializing ops in their own scope
      # irrespective of the current scope (training or eval).
      with tf.name_scope(self._param_scope):
        self.params[name] = tf.get_variable(name, shape, dtype, initializer)
        param = self.params[name]
        if initializer is not None:
          self.inits[name] = state_ops.init_variable(param, initializer)
        if self._averaging_decay == 1:
          logging.info('Using vanilla averaging of parameters.')
          ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
                                                  num_updates=None)
        else:
          ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
                                                  num_updates=step)
        self._averaging[name + '_avg_update'] = ema.apply([param])
        self.variables[name + '_avg_var'] = ema.average(param)
        self.inits[name + '_avg_init'] = state_ops.init_variable(
            ema.average(param), tf.zeros_initializer)
    return (self.variables[name + '_avg_var'] if return_average else
            self.params[name])
Ejemplo n.º 38
0
def generate_checkpoint_state_proto(save_dir,
                                    model_checkpoint_path,
                                    all_model_checkpoint_paths=None):
    """Generates a checkpoint state proto.

  Args:
    save_dir: Directory where the model was saved.
    model_checkpoint_path: The checkpoint file.
    all_model_checkpoint_paths: List of strings.  Paths to all not-yet-deleted
      checkpoints, sorted from oldest to newest.  If this is a non-empty list,
      the last element must be equal to model_checkpoint_path.  These paths
      are also saved in the CheckpointState proto.

  Returns:
    CheckpointState proto with model_checkpoint_path and
    all_model_checkpoint_paths updated to either absolute paths or
    relative paths to the current save_dir.
  """
    if all_model_checkpoint_paths is None:
        all_model_checkpoint_paths = []

    if (not all_model_checkpoint_paths
            or all_model_checkpoint_paths[-1] != model_checkpoint_path):
        logging.info(
            "%s is not in all_model_checkpoint_paths. Manually adding it.",
            model_checkpoint_path)
        all_model_checkpoint_paths.append(model_checkpoint_path)

    # Relative paths need to be rewritten to be relative to the "save_dir"
    # if model_checkpoint_path already contains "save_dir".
    if not os.path.isabs(save_dir):
        if not os.path.isabs(model_checkpoint_path):
            model_checkpoint_path = os.path.relpath(model_checkpoint_path,
                                                    save_dir)
        for i in range(len(all_model_checkpoint_paths)):
            p = all_model_checkpoint_paths[i]
            if not os.path.isabs(p):
                all_model_checkpoint_paths[i] = os.path.relpath(p, save_dir)

    coord_checkpoint_proto = CheckpointState(
        model_checkpoint_path=model_checkpoint_path,
        all_model_checkpoint_paths=all_model_checkpoint_paths)

    return coord_checkpoint_proto
Ejemplo n.º 39
0
def main(unused_argv=None):
    if FLAGS.debug:
        logging.set_verbosity(logging.DEBUG)
        logging.info('TensorBoard is in debug mode.')

    if not FLAGS.logdir:
        logging.error(
            'A logdir must be specified. Run `tensorboard --help` for '
            'details and examples.')
        return -1

    logging.info('Starting TensorBoard in directory %s', os.getcwd())

    path_to_run = ParseEventFilesFlag(FLAGS.logdir)
    logging.info('TensorBoard path_to_run is: %s', path_to_run)
    multiplexer = event_multiplexer.EventMultiplexer(
        size_guidance=TENSORBOARD_SIZE_GUIDANCE)

    def _Load():
        start = time.time()
        for (path, name) in six.iteritems(path_to_run):
            multiplexer.AddRunsFromDirectory(path, name)
        multiplexer.Reload()
        duration = time.time() - start
        logging.info('Multiplexer done loading. Load took %0.1f secs',
                     duration)
        t = threading.Timer(LOAD_INTERVAL, _Load)
        t.daemon = True
        t.start()

    t = threading.Timer(0, _Load)
    t.daemon = True
    t.start()

    factory = functools.partial(tensorboard_handler.TensorboardHandler,
                                multiplexer)
    try:
        server = ThreadedHTTPServer((FLAGS.host, FLAGS.port), factory)
    except socket.error:
        logging.error(
            'Tried to connect to port %d, but that address is in use.',
            FLAGS.port)
        return -2
    try:
        tag = resource_loader.load_resource('tensorboard/TAG').strip()
        logging.info('TensorBoard is tag: %s', tag)
    except IOError:
        logging.warning('Unable to read TensorBoard tag')
        tag = ''

    status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port)
    print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port))
    print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
    server.serve_forever()
Ejemplo n.º 40
0
def generate_checkpoint_state_proto(save_dir,
                                    model_checkpoint_path,
                                    all_model_checkpoint_paths=None):
  """Generates a checkpoint state proto.

  Args:
    save_dir: Directory where the model was saved.
    model_checkpoint_path: The checkpoint file.
    all_model_checkpoint_paths: List of strings.  Paths to all not-yet-deleted
      checkpoints, sorted from oldest to newest.  If this is a non-empty list,
      the last element must be equal to model_checkpoint_path.  These paths
      are also saved in the CheckpointState proto.

  Returns:
    CheckpointState proto with model_checkpoint_path and
    all_model_checkpoint_paths updated to either absolute paths or
    relative paths to the current save_dir.
  """
  if all_model_checkpoint_paths is None:
    all_model_checkpoint_paths = []

  if (not all_model_checkpoint_paths or
      all_model_checkpoint_paths[-1] != model_checkpoint_path):
    logging.info(
        "%s is not in all_model_checkpoint_paths. Manually adding it.",
        model_checkpoint_path)
    all_model_checkpoint_paths.append(model_checkpoint_path)

  # Relative paths need to be rewritten to be relative to the "save_dir"
  # if model_checkpoint_path already contains "save_dir".
  if not os.path.isabs(save_dir):
    if not os.path.isabs(model_checkpoint_path):
      model_checkpoint_path = os.path.relpath(model_checkpoint_path, save_dir)
    for i in range(len(all_model_checkpoint_paths)):
      p = all_model_checkpoint_paths[i]
      if not os.path.isabs(p):
        all_model_checkpoint_paths[i] = os.path.relpath(p, save_dir)

  coord_checkpoint_proto = CheckpointState(
      model_checkpoint_path=model_checkpoint_path,
      all_model_checkpoint_paths=all_model_checkpoint_paths)

  return coord_checkpoint_proto
Ejemplo n.º 41
0
  def request_stop(self, ex=None):
    """Request that the threads stop.

    After this is called, calls to `should_stop()` will return `True`.

    Note: If an exception is being passed in, in must be in the context of
    handling the exception (i.e. `try: ... except Exception as ex: ...`) and not
    a newly created one.

    Args:
      ex: Optional `Exception`, or Python `exc_info` tuple as returned by
        `sys.exc_info()`.  If this is the first call to `request_stop()` the
        corresponding exception is recorded and re-raised from `join()`.
    """
    ex = self._filter_exception(ex)
    with self._lock:
      if not self._stop_event.is_set():
        if ex and self._exc_info_to_raise is None:
          if isinstance(ex, tuple):
            logging.info("Error reported to Coordinator: %s",
                         compat.as_str_any(ex[1]))
            self._exc_info_to_raise = ex
          else:
            logging.info("Error reported to Coordinator: %s",
                         compat.as_str_any(ex))
            self._exc_info_to_raise = sys.exc_info()
          # self._exc_info_to_raise should contain a tuple containing exception
          # (type, value, traceback)
          if (len(self._exc_info_to_raise) != 3 or
              not self._exc_info_to_raise[0] or
              not self._exc_info_to_raise[1]):
            # Raise, catch and record the exception here so that error happens
            # where expected.
            try:
              raise ValueError(
                  "ex must be a tuple or sys.exc_info must return the current "
                  "exception: %s"
                  % self._exc_info_to_raise)
            except ValueError:
              # Record this error so it kills the coordinator properly.
              self._exc_info_to_raise = sys.exc_info()

        self._stop_event.set()
Ejemplo n.º 42
0
  def _call_func(self, args, kwargs, check_for_new_variables):
    try:
      vars_at_start = len(ops.get_collection(ops.GraphKeys.VARIABLES))
      trainable_at_start = len(
          ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))

      result = self._func(*args, **kwargs)
      if check_for_new_variables:
        trainable_variables = ops.get_collection(
            ops.GraphKeys.TRAINABLE_VARIABLES)
        # If a variable that we intend to train is created as a side effect
        # of creating a template, then that is almost certainly an error.
        if trainable_at_start != len(trainable_variables):
          raise ValueError("Trainable variable created when calling a template "
                           "after the first time, perhaps you used tf.Variable "
                           "when you meant tf.get_variable: %s" %
                           (trainable_variables[trainable_at_start:],))

        # Non-trainable tracking variables are a legitimate reason why a new
        # variable would be created, but it is a relatively advanced use-case,
        # so log it.
        variables = ops.get_collection(ops.GraphKeys.VARIABLES)
        if vars_at_start != len(variables):
          logging.info("New variables created when calling a template after "
                       "the first time, perhaps you used tf.Variable when you "
                       "meant tf.get_variable: %s",
                       variables[vars_at_start:])
      return result
    except Exception, exc:
      # Reraise the exception, but append the original definition to the
      # trace.
      args = exc.args
      if not args:
        arg0 = ""
      else:
        arg0 = args[0]
      trace = "".join(_skip_common_stack_elements(self._stacktrace,
                                                  traceback.format_stack()))
      arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace)
      new_args = [arg0]
      new_args.extend(args[1:])
      exc.args = tuple(new_args)
      raise
Ejemplo n.º 43
0
def main(unused_argv=None):
  if FLAGS.debug:
    logging.set_verbosity(logging.DEBUG)
    logging.info('TensorBoard is in debug mode.')

  if not FLAGS.logdir:
    logging.error('A logdir must be specified. Run `tensorboard --help` for '
                  'details and examples.')
    return -1

  logging.info('Starting TensorBoard in directory %s', os.getcwd())

  path_to_run = ParseEventFilesFlag(FLAGS.logdir)
  logging.info('TensorBoard path_to_run is: %s', path_to_run)
  multiplexer = event_multiplexer.EventMultiplexer(
      size_guidance=TENSORBOARD_SIZE_GUIDANCE)
  # Ensure the Multiplexer initializes in a loaded state before it adds runs
  # So it can handle HTTP requests while runs are loading

  multiplexer.Reload()
  def _Load():
    start = time.time()
    for (path, name) in six.iteritems(path_to_run):
      multiplexer.AddRunsFromDirectory(path, name)
    multiplexer.Reload()
    duration = time.time() - start
    logging.info('Multiplexer done loading. Load took %0.1f secs', duration)
    t = threading.Timer(LOAD_INTERVAL, _Load)
    t.daemon = True
    t.start()
  t = threading.Timer(0, _Load)
  t.daemon = True
  t.start()

  factory = functools.partial(tensorboard_handler.TensorboardHandler,
                              multiplexer)
  try:
    server = ThreadedHTTPServer((FLAGS.host, FLAGS.port), factory)
  except socket.error:
    logging.error('Tried to connect to port %d, but that address is in use.',
                  FLAGS.port)
    return -2
  try:
    tag = resource_loader.load_resource('tensorboard/TAG').strip()
    logging.info('TensorBoard is tag: %s', tag)
  except IOError:
    logging.warning('Unable to read TensorBoard tag')
    tag = ''

  status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port)
  print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port))
  print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
  server.serve_forever()
Ejemplo n.º 44
0
    def request_stop(self, ex=None):
        """Request that the threads stop.

    After this is called, calls to `should_stop()` will return `True`.

    Note: If an exception is being passed in, in must be in the context of
    handling the exception (i.e. `try: ... except Exception as ex: ...`) and not
    a newly created one.

    Args:
      ex: Optional `Exception`, or Python `exc_info` tuple as returned by
        `sys.exc_info()`.  If this is the first call to `request_stop()` the
        corresponding exception is recorded and re-raised from `join()`.
    """
        ex = self._filter_exception(ex)
        with self._lock:
            if not self._stop_event.is_set():
                if ex and self._exc_info_to_raise is None:
                    if isinstance(ex, tuple):
                        logging.info("Error reported to Coordinator: %s",
                                     compat.as_str_any(ex[1]))
                        self._exc_info_to_raise = ex
                    else:
                        logging.info("Error reported to Coordinator: %s",
                                     compat.as_str_any(ex))
                        self._exc_info_to_raise = sys.exc_info()
                    # self._exc_info_to_raise should contain a tuple containing exception
                    # (type, value, traceback)
                    if (len(self._exc_info_to_raise) != 3
                            or not self._exc_info_to_raise[0]
                            or not self._exc_info_to_raise[1]):
                        # Raise, catch and record the exception here so that error happens
                        # where expected.
                        try:
                            raise ValueError(
                                "ex must be a tuple or sys.exc_info must return the current "
                                "exception: %s" % self._exc_info_to_raise)
                        except ValueError:
                            # Record this error so it kills the coordinator properly.
                            self._exc_info_to_raise = sys.exc_info()

                self._stop_event.set()
Ejemplo n.º 45
0
  def MakeGraph(self,
                max_steps=10,
                beam_size=2,
                batch_size=1,
                **kwargs):
    """Constructs a structured learning graph."""
    assert max_steps > 0, 'Empty network not supported.'

    logging.info('MakeGraph + %s', kwargs)

    with self.test_session(graph=tf.Graph()) as sess:
      feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
          gen_parser_ops.feature_size(task_context=self._task_context))
    embedding_dims = [8, 8, 8]
    hidden_layer_sizes = []
    learning_rate = 0.01
    builder = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        seed=1,
        max_steps=max_steps,
        beam_size=beam_size,
        gate_gradients=True,
        use_locking=True,
        use_averaging=False,
        check_parameters=False,
        **kwargs)
    builder.AddTraining(self._task_context,
                        batch_size,
                        learning_rate=learning_rate,
                        decay_steps=1000,
                        momentum=0.9,
                        corpus_name='training-corpus')
    builder.AddEvaluation(self._task_context,
                          batch_size,
                          evaluation_max_steps=25,
                          corpus_name=None)
    builder.training['inits'] = tf.group(*builder.inits.values(), name='inits')
    return builder
Ejemplo n.º 46
0
    def AddRun(self, path, name=None):
        """Add a run to the multiplexer.

    If the name is not specified, it is the same as the path.

    If a run by that name exists, and we are already watching the right path,
      do nothing. If we are watching a different path, replace the event
      accumulator.

    If `AutoUpdate` or `Reload` have been called, it will `AutoUpdate` or
    `Reload` the newly created accumulators. This maintains the invariant that
    once the Multiplexer was activated, all of its accumulators are active.

    Args:
      path: Path to the event files (or event directory) for given run.
      name: Name of the run to add. If not provided, is set to path.

    Returns:
      The `EventMultiplexer`.
    """
        if name is None or name is '':
            name = path
        accumulator = None
        with self._accumulators_mutex:
            if name not in self._accumulators or self._paths[name] != path:
                if name in self._paths and self._paths[name] != path:
                    # TODO(danmane) - Make it impossible to overwrite an old path with
                    # a new path (just give the new path a distinct name)
                    logging.warning(
                        'Conflict for name %s: old path %s, new path %s', name,
                        self._paths[name], path)
                logging.info('Constructing EventAccumulator for %s', path)
                accumulator = event_accumulator.EventAccumulator(
                    path, self._size_guidance)
                self._accumulators[name] = accumulator
                self._paths[name] = path
        if accumulator:
            if self._reload_called:
                accumulator.Reload()
            if self._autoupdate_called:
                accumulator.AutoUpdate(self._autoupdate_interval)
        return self
Ejemplo n.º 47
0
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  with tf.Session() as sess:
    src = gen_parser_ops.document_source(batch_size=32,
                                         corpus_name=FLAGS.corpus_name,
                                         task_context=FLAGS.task_context)
    sentence = sentence_pb2.Sentence()
    while True:
      documents, finished = sess.run(src)
      logging.info('Read %d documents', len(documents))
      for d in documents:
        sentence.ParseFromString(d)
        tr = asciitree.LeftAligned()
        d = to_dict(sentence)
        print 'Input: %s' % sentence.text
        print 'Parse:'
        print tr(d)

      if finished:
        break
Ejemplo n.º 48
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    with tf.Session() as sess:
        src = gen_parser_ops.document_source(batch_size=32,
                                             corpus_name=FLAGS.corpus_name,
                                             task_context=FLAGS.task_context)
        sentence = sentence_pb2.Sentence()
        while True:
            documents, finished = sess.run(src)
            logging.info('Read %d documents', len(documents))
            for d in documents:
                sentence.ParseFromString(d)
                tr = asciitree.LeftAligned()
                d = to_dict(sentence)
                print 'Input: %s' % sentence.text
                print 'Parse:'
                print tr(d)

            if finished:
                break
Ejemplo n.º 49
0
  def request_stop(self, ex=None):
    """Request that the threads stop.

    After this is called, calls to should_stop() will return True.

    Args:
      ex: Optional Exception, or Python 'exc_info' tuple as returned by
        sys.exc_info().  If this is the first call to request_stop() the
        corresponding exception is recorded and re-raised from join().
    """
    with self._lock:
      if not self._stop_event.is_set():
        if ex and self._exc_info_to_raise is None:
          if isinstance(ex, tuple):
            logging.info("Error reported to Coordinator: %s", str(ex[1]))
            self._exc_info_to_raise = ex
          else:
            logging.info("Error reported to Coordinator: %s", str(ex))
            self._exc_info_to_raise = sys.exc_info()
        self._stop_event.set()
Ejemplo n.º 50
0
  def _get_first_op_from_collection(self, key):
    """Returns the first `Operation` from a collection.

    Args:
      key: A string collection key.

    Returns:
      The first Op found in a collection, or `None` if the collection is empty.
    """
    try:
      op_list = ops.get_collection(key)
      if len(op_list) > 1:
        logging.info("Found %d %s operations. Returning the first one.",
                     len(op_list), key)
      if op_list:
        return op_list[0]
    except LookupError:
      pass

    return None
Ejemplo n.º 51
0
  def _get_first_op_from_collection(self, key):
    """Returns the first `Operation` from a collection.

    Args:
      key: A string collection key.

    Returns:
      The first Op found in a collection, or `None` if the collection is empty.
    """
    try:
      op_list = ops.get_collection(key)
      if len(op_list) > 1:
        logging.info("Found %d %s operations. Returning the first one.",
                     len(op_list), key)
      if op_list:
        return op_list[0]
    except LookupError:
      pass

    return None
Ejemplo n.º 52
0
  def request_stop(self, ex=None):
    """Request that the threads stop.

    After this is called, calls to should_stop() will return True.

    Args:
      ex: Optional Exception, or Python 'exc_info' tuple as returned by
        sys.exc_info().  If this is the first call to request_stop() the
        corresponding exception is recorded and re-raised from join().
    """
    with self._lock:
      if not self._stop_event.is_set():
        if ex and self._exc_info_to_raise is None:
          if isinstance(ex, tuple):
            logging.info("Error reported to Coordinator: %s", str(ex[1]))
            self._exc_info_to_raise = ex
          else:
            logging.info("Error reported to Coordinator: %s", str(ex))
            self._exc_info_to_raise = sys.exc_info()
        self._stop_event.set()
Ejemplo n.º 53
0
def l1_regularizer(scale):
    """Returns a function that can be used to apply L1 regularization to weights.

  L1 regularization encourages sparsity.

  Args:
    scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.

  Returns:
    A function with signature `l1(weights, name=None)` that apply L1
    regularization.

  Raises:
    ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a
    float.
  """
    if isinstance(scale, numbers.Integral):
        raise ValueError('scale cannot be an integer: %s' % scale)
    if isinstance(scale, numbers.Real):
        if scale < 0.:
            raise ValueError(
                'Setting a scale less than 0 on a regularizer: %g' % scale)
        if scale >= 1.:
            raise ValueError(
                'Setting a scale greater than 1 on a regularizer: %g' % scale)
        if scale == 0.:
            logging.info('Scale of 0 disables regularizer.')
            return lambda _, name=None: None

    def l1(weights, name=None):
        """Applies L1 regularization to weights."""
        with ops.op_scope([weights], name, 'l1_regularizer') as scope:
            my_scale = ops.convert_to_tensor(scale,
                                             dtype=weights.dtype.base_dtype,
                                             name='scale')
            return standard_ops.mul(my_scale,
                                    standard_ops.reduce_sum(
                                        standard_ops.abs(weights)),
                                    name=scope)

    return l1
Ejemplo n.º 54
0
    def testParsingReaderOpWhileLoop(self):
        feature_size = 3
        batch_size = 5

        def ParserEndpoints():
            return gen_parser_ops.gold_parse_reader(
                self._task_context,
                feature_size,
                batch_size,
                corpus_name='training-corpus')

        with self.test_session() as sess:
            # The 'condition' and 'body' functions expect as many arguments as there
            # are loop variables. 'condition' depends on the 'epoch' loop variable
            # only, so we disregard the remaining unused function arguments. 'body'
            # returns a list of updated loop variables.
            def Condition(epoch, *unused_args):
                return tf.less(epoch, 2)

            def Body(epoch, num_actions, *feature_args):
                # By adding one of the outputs of the reader op ('epoch') as a control
                # dependency to the reader op we force the repeated evaluation of the
                # reader op.
                with epoch.graph.control_dependencies([epoch]):
                    features, epoch, gold_actions = ParserEndpoints()
                num_actions = tf.maximum(
                    num_actions,
                    tf.reduce_max(gold_actions, [0], False) + 1)
                feature_ids = []
                for i in range(len(feature_args)):
                    feature_ids.append(features[i])
                return [epoch, num_actions] + feature_ids

            epoch = ParserEndpoints()[-2]
            num_actions = tf.constant(0)
            loop_vars = [epoch, num_actions]

            res = sess.run(
                cf.While(Condition, Body, loop_vars, parallel_iterations=1))
            logging.info('Result: %s', res)
            self.assertEqual(res[0], 2)