def __init__(self,
               save_steps=100,
               output_dir=None,
               summary_writer=None,
               scaffold=None,
               summary_op=None):
    """Initializes a `SummarySaver` monitor.

    Args:
      save_steps: `int`, save summaries every N steps. See `EveryN`.
      output_dir: `string`, the directory to save the summaries to. Only used
          if no `summary_writer` is supplied.
      summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed,
          one will be created accordingly.
      scaffold: `Scaffold` to get summary_op if it's not provided.
      summary_op: `Tensor` of type `string`. A serialized `Summary` protocol
          buffer, as output by TF summary methods like `scalar_summary` or
          `merge_all_summaries`.
    """
    # TODO(ipolosukhin): Implement every N seconds.
    self._summary_op = summary_op
    self._summary_writer = summary_writer
    if summary_writer is None and output_dir:
      self._summary_writer = SummaryWriterCache.get(output_dir)
    self._scaffold = scaffold
    self._save_steps = save_steps
  def __init__(self,
               checkpoint_dir,
               save_secs=None,
               save_steps=None,
               saver=None,
               checkpoint_basename="model.ckpt",
               scaffold=None):
    """Initialize CheckpointSaverHook monitor.

    Args:
      checkpoint_dir: `str`, base directory for the checkpoint files.
      save_secs: `int`, save every N secs.
      save_steps: `int`, save every N steps.
      saver: `Saver` object, used for saving.
      checkpoint_basename: `str`, base name for the checkpoint files.
      scaffold: `Scaffold`, use to get saver object.

    Raises:
      ValueError: One of `save_steps` or `save_secs` should be set.
    """
    logging.info("Create CheckpointSaverHook.")
    self._saver = saver
    self._checkpoint_dir = checkpoint_dir
    self._summary_writer = SummaryWriterCache.get(checkpoint_dir)
    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
    self._scaffold = scaffold
    self._save_secs = save_secs
    self._save_steps = save_steps
    self._last_saved_time = None
    self._last_saved_step = None

    if save_steps is None and save_secs is None:
      raise ValueError("Either save_steps or save_secs should be provided")
    if (save_steps is not None) and (save_secs is not None):
      raise ValueError("Can not provide both save_steps and save_secs.")
Example #3
0
    def begin(self):
        # These calls only works because the SessionRunHook api guarantees this
        # will get called within a graph context containing our model graph.

        self.summary_writer = SummaryWriterCache.get(self.working_dir)
        self.weight_tensors = tf.trainable_variables()
        self.global_step = tf.train.get_or_create_global_step()
  def __init__(self,
               save_steps=100,
               save_secs=None,
               output_dir=None,
               summary_writer=None,
               scaffold=None,
               summary_op=None):
    """Initializes a `SummarySaver` monitor.

    Args:
      save_steps: `int`, save summaries every N steps. Exactly one of
          `save_secs` and `save_steps` should be set.
      save_secs: `int`, save summaries every N seconds.
      output_dir: `string`, the directory to save the summaries to. Only used
          if no `summary_writer` is supplied.
      summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed,
          one will be created accordingly.
      scaffold: `Scaffold` to get summary_op if it's not provided.
      summary_op: `Tensor` of type `string`. A serialized `Summary` protocol
          buffer, as output by TF summary methods like `scalar_summary` or
          `merge_all_summaries`.
    """
    self._summary_op = summary_op
    self._summary_writer = summary_writer
    if summary_writer is None and output_dir:
      self._summary_writer = SummaryWriterCache.get(output_dir)
    self._scaffold = scaffold
    self._timer = _SecondOrStepTimer(every_secs=save_secs,
                                     every_steps=save_steps)
  def __init__(self,
               save_steps=None,
               save_secs=None,
               output_dir="",
               show_dataflow=True,
               show_memory=False):
    """Initializes a hook that takes periodic profiling snapshots.

    `options.run_metadata` argument of `tf.Session.Run` is used to collect
    metadata about execution. This hook sets the metadata and dumps it in Chrome
    Trace format.


    Args:
      save_steps: `int`, save profile traces every N steps. Exactly one of
          `save_secs` and `save_steps` should be set.
      save_secs: `int` or `float`, save profile traces every N seconds.
      output_dir: `string`, the directory to save the profile traces to.
          Defaults to the current directory.
      show_dataflow: `bool`, if True, add flow events to the trace connecting
          producers and consumers of tensors.
      show_memory: `bool`, if True, add object snapshot events to the trace
          showing the sizes and lifetimes of tensors.
    """
    self._output_file = os.path.join(output_dir, "timeline-{}.json")
    self._file_writer = SummaryWriterCache.get(output_dir)
    self._show_dataflow = show_dataflow
    self._show_memory = show_memory
    self._timer = SecondOrStepTimer(
        every_secs=save_secs, every_steps=save_steps)
  def __init__(self,
               checkpoint_dir,
               save_secs=None,
               save_steps=None,
               saver=None,
               checkpoint_basename="model.ckpt",
               scaffold=None):
    """Initialize CheckpointSaverHook monitor.

    Args:
      checkpoint_dir: `str`, base directory for the checkpoint files.
      save_secs: `int`, save every N secs.
      save_steps: `int`, save every N steps.
      saver: `Saver` object, used for saving.
      checkpoint_basename: `str`, base name for the checkpoint files.
      scaffold: `Scaffold`, use to get saver object.

    Raises:
      ValueError: One of `save_steps` or `save_secs` should be set.
      ValueError: Exactly one of saver or scaffold should be set.
    """
    logging.info("Create CheckpointSaverHook.")
    if ((saver is None and scaffold is None) or
        (saver is not None and scaffold is not None)):
      raise ValueError("Exactly one of saver or scaffold must be provided.")
    self._saver = saver
    self._checkpoint_dir = checkpoint_dir
    self._summary_writer = SummaryWriterCache.get(checkpoint_dir)
    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
    self._scaffold = scaffold
    self._timer = _SecondOrStepTimer(every_secs=save_secs,
                                     every_steps=save_steps)
Example #7
0
  def after_run(self, run_context, run_values):
    del run_context  # Unused by feature importance summary saver hook.

    # Read result tensors.
    global_step = run_values.results["global_step"]
    feature_names = run_values.results["feature_names"]
    feature_usage_counts = run_values.results["feature_usage_counts"]
    feature_gains = run_values.results["feature_gains"]

    # Ensure summaries are logged at desired frequency
    if (self._last_triggered_step is not None and
        global_step < self._last_triggered_step + self._every_n_steps):
      return

    # Validate tensors.
    if (len(feature_names) != len(feature_usage_counts) or
        len(feature_names) != len(feature_gains)):
      raise RuntimeError(
          "Feature names and importance measures have inconsistent lengths.")

    # Compute total usage.
    total_usage_count = 0.0
    for usage_count in feature_usage_counts:
      total_usage_count += usage_count
    usage_count_norm = 1.0 / total_usage_count if total_usage_count else 1.0

    # Compute total gain.
    total_gain = 0.0
    for gain in feature_gains:
      total_gain += gain
    gain_norm = 1.0 / total_gain if total_gain else 1.0

    # Output summary for each feature.
    self._last_triggered_step = global_step
    for (name, usage_count, gain) in zip(feature_names, feature_usage_counts,
                                         feature_gains):
      output_dir = os.path.join(self._model_dir, name.decode("utf-8"))
      summary_writer = SummaryWriterCache.get(output_dir)
      usage_count_summary = Summary(value=[
          Summary.Value(
              tag="feature_importance/usage_counts",
              simple_value=usage_count)
      ])
      usage_fraction_summary = Summary(value=[
          Summary.Value(
              tag="feature_importance/usage_fraction",
              simple_value=usage_count * usage_count_norm)
      ])
      summary_writer.add_summary(usage_count_summary, global_step)
      summary_writer.add_summary(usage_fraction_summary, global_step)
      gains_summary = Summary(
          value=[Summary.Value(
              tag="feature_importance/gains",
              simple_value=gain)])
      gains_fraction_summary = Summary(
          value=[Summary.Value(
              tag="feature_importance/gains_fraction",
              simple_value=gain * gain_norm)])
      summary_writer.add_summary(gains_summary, global_step)
      summary_writer.add_summary(gains_fraction_summary, global_step)
Example #8
0
    def begin(self):
        if self._output_dir:
            self._summary_writer = SummaryWriterCache.get(self._output_dir)

        self._next_step = None
        self._global_step = tf.train.get_global_step()
        if self._global_step is None:
            raise RuntimeError('Global step must be created for VarVisHook.')
 def begin(self):
   if self._summary_writer is None and self._output_dir:
     self._summary_writer = SummaryWriterCache.get(self._output_dir)
   self._next_step = None
   self._global_step_tensor = training_util.get_global_step()
   if self._global_step_tensor is None:
     raise RuntimeError(
         "Global step should be created to use SummarySaverHook.")
Example #10
0
 def begin(self):
     if self._summary_writer is None and self._output_dir:
         self._summary_writer = SummaryWriterCache.get(self._output_dir)
     self._next_episode = None
     self._current_episode = None
     self._global_episode_tensor = get_global_episode()
     if self._global_episode_tensor is None:
         raise RuntimeError("Global episode should be created to use EpisodeSummarySaverHook.")
 def begin(self):
   if self._summary_writer is None and self._output_dir:
     self._summary_writer = SummaryWriterCache.get(self._output_dir)
   self._global_step_tensor = training_util.get_global_step()
   if self._global_step_tensor is None:
     raise RuntimeError(
         "Global step should be created to use StepCounterHook.")
   self._summary_tag = self._global_step_tensor.op.name + "/sec"
 def begin(self):
   if self._summary_writer is None and self._output_dir:
     self._summary_writer = SummaryWriterCache.get(self._output_dir)
   self._next_step = None
   self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
   if self._global_step_tensor is None:
     raise RuntimeError(
         "Global step should be created to use SummarySaverHook.")
Example #13
0
 def begin(self):
   self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
   self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
   if self._global_step_tensor is None:
     raise RuntimeError(
         "Global step should be created to use CheckpointSaverHook.")
   for l in self._listeners:
     l.begin()
 def begin(self):
   self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
   self._global_step_tensor = training_util.get_global_step()
   if self._global_step_tensor is None:
     raise RuntimeError(
         "Global step should be created to use CheckpointSaverHook.")
   for l in self._listeners:
     l.begin()
Example #15
0
 def begin(self):
     if self._summary_writer is None and self._output_dir:
         self._summary_writer = SummaryWriterCache.get(self._output_dir)
     self._global_episode_tensor = get_global_episode()
     if self._global_episode_tensor is None:
         raise RuntimeError("Global step should be created to use EpisodeCounterHook.")
     self._summary_sec_tag = self._global_episode_tensor.op.name + "/sec"
     self._summary_steps_tag = self._global_episode_tensor.op.name + "/steps"
     self._num_steps = 0
  def __init__(self,
               every_n_steps=100,
               every_n_secs=None,
               output_dir=None,
               summary_writer=None):

    if (every_n_steps is None) == (every_n_secs is None):
      raise ValueError(
          "exactly one of every_n_steps and every_n_secs should be provided.")
    self._timer = _SecondOrStepTimer(every_steps=every_n_steps,
                                     every_secs=every_n_secs)

    self._summary_writer = summary_writer
    if summary_writer is None and output_dir:
      self._summary_writer = SummaryWriterCache.get(output_dir)
  def __init__(self,
               checkpoint_dir,
               save_secs=None,
               save_steps=None,
               saver=None,
               checkpoint_basename="model.ckpt",
               scaffold=None,
               listeners=None):
    """Initialize CheckpointSaverHook monitor.

    Args:
      checkpoint_dir: `str`, base directory for the checkpoint files.
      save_secs: `int`, save every N secs.
      save_steps: `int`, save every N steps.
      saver: `Saver` object, used for saving.
      checkpoint_basename: `str`, base name for the checkpoint files.
      scaffold: `Scaffold`, use to get saver object.
      listeners: List of `CheckpointSaverListener` subclass instances.
        Used for callbacks that run immediately after the corresponding
        CheckpointSaverHook callbacks, only in steps where the
        CheckpointSaverHook was triggered.

    Raises:
      ValueError: One of `save_steps` or `save_secs` should be set.
      ValueError: Exactly one of saver or scaffold should be set.
    """
    logging.info("Create CheckpointSaverHook.")
    if ((saver is None and scaffold is None) or
        (saver is not None and scaffold is not None)):
      raise ValueError("Exactly one of saver or scaffold must be provided.")
    self._saver = saver
    self._checkpoint_dir = checkpoint_dir
    self._summary_writer = SummaryWriterCache.get(checkpoint_dir)
    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
    self._scaffold = scaffold
    self._timer = _SecondOrStepTimer(every_secs=save_secs,
                                     every_steps=save_steps)
    self._listeners = listeners or []
  def __init__(self,
               save_steps=None,
               save_secs=None,
               output_dir=None,
               summary_writer=None,
               scaffold=None,
               summary_op=None):
    """Initializes a `SummarySaver` monitor.

    Args:
      save_steps: `int`, save summaries every N steps. Exactly one of
          `save_secs` and `save_steps` should be set.
      save_secs: `int`, save summaries every N seconds.
      output_dir: `string`, the directory to save the summaries to. Only used
          if no `summary_writer` is supplied.
      summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed,
          one will be created accordingly.
      scaffold: `Scaffold` to get summary_op if it's not provided.
      summary_op: `Tensor` of type `string` containing the serialized `Summary`
          protocol buffer or a list of `Tensor`. They are most likely an output
          by TF summary methods like `tf.summary.scalar` or
          `tf.summary.merge_all`. It can be passed in as one tensor; if more
          than one, they must be passed in as a list.

    Raises:
      ValueError: Exactly one of scaffold or summary_op should be set.
    """
    if ((scaffold is None and summary_op is None) or
        (scaffold is not None and summary_op is not None)):
      raise ValueError(
          "Exactly one of scaffold or summary_op must be provided.")
    self._summary_op = summary_op
    self._summary_writer = summary_writer
    if summary_writer is None and output_dir:
      self._summary_writer = SummaryWriterCache.get(output_dir)
    self._scaffold = scaffold
    self._timer = _SecondOrStepTimer(every_secs=save_secs,
                                     every_steps=save_steps)
Example #19
0
 stepsSaver = StepsSaver(FLAGS.our_log_dir)
 reward_vector2scalar = FuncReward(gamma)
 # Configure sess
 config = tf.ConfigProto()
 config.gpu_options.allow_growth = True
 with __agent.create_session(
         config=config, save_dir=FLAGS.tf_log_dir,
         save_checkpoint_secs=FLAGS.save_checkpoint_secs) as sess, \
     AsynchronousAgent(
         agent=__agent, method='rate', rate=update_rate) as _agent:
     agent = SkippingAgent(
         # n_skip_vec=(2, 6, 6),
         agent=_agent,
         n_skip=n_skip,
         specific_act=noop)
     summary_writer = SummaryWriterCache.get(FLAGS.tf_log_dir)
     # set vars
     sess.run(op_set_lr, feed_dict={lr_in: learning_rate})
     print "Using learning rate {}".format(sess.run(lr))
     n_ep = 0
     n_total_steps = 0
     # GoGoGo
     for _ in range(1000):
         cum_reward = 0.0
         n_ep_steps = 0
         state = env.reset()
         while True:
             action = agent.act(state)
             if action != 3:
                 print_qvals(n_ep_steps, __agent, state, action,
                             AGENT_ACTIONS)
 stepsSaver = StepsSaver(our_log_dir)
 reward_vector2scalar = FuncReward(gamma)
 # Configure sess
 config = tf.ConfigProto()
 config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_fraction
 with __agent.create_session(
         config=config, save_dir=tf_log_dir,
         save_checkpoint_secs=save_checkpoint_secs) as sess, \
     AsynchronousAgent(
         agent=__agent, method='ratio', ratio=update_ratio) as _agent:
     agent = SkippingAgent(
         # n_skip_vec=(2, 6, 6),
         agent=_agent,
         n_skip=n_skip,
         specific_act=noop)
     summary_writer = SummaryWriterCache.get(tf_log_dir)
     # set vars
     sess.run(op_set_lr, feed_dict={lr_in: learning_rate})
     print "Using learning rate {}".format(sess.run(lr))
     n_ep = 0
     n_total_steps = start_step
     # GoGoGo
     while n_total_steps <= 2.5e5:
         cum_reward = 0.0
         n_ep_steps = 0
         state = env.reset()
         while True:
             action = agent.act(state)
             if action != 3:
                 print_qvals(n_ep_steps, __agent, state, action,
                             AGENT_ACTIONS)
Example #21
0
else:
    for var in tf.global_variables():
        print "var_name: ", var.name
        if 'Adam' in var.name or 'optimizers/beta1_power' in var.name \
                or 'optimizers/beta2_power' in var.name\
                or 'q_logits' in var.name\
                or var.name == 'global_step:0':
            pass
        else:
            restore_var_list.append(var)


try:
    with agent.create_session(config=config, save_dir=FLAGS.logdir, save_checkpoint_secs=3600,
                              restore_var_list=restore_var_list) as sess:
        summary_writer = SummaryWriterCache.get(FLAGS.logdir)
        all_vars = tf.global_variables()
        with open(FLAGS.logdir+"/readme.txt", "w") as f:
            f.write("readme: {}\n".format(FLAGS.readme))
            f.write("logdir: {}\n".format(FLAGS.logdir))
            f.write("savedir: {}\n".format(FLAGS.savedir))
            f.write("restore var names: \n")
            for var_name in restore_var_list:
                f.write("{}\n".format(var_name))
            f.write("gpu_fraction: {}\n".format(FLAGS.gpu_fraction))
            f.write("discount_factor: {}\n".format(FLAGS.discount_factor))
            f.write("batch_size: {}\n".format(FLAGS.batch_size))
            f.write("ac learning rate: {}\n".format(FLAGS.lr))
            f.write("is_learn_q: {} \n".format(FLAGS.is_learn_q))
            f.write("is_fine_tune: {} \n".format(FLAGS.is_fine_tune))
Example #22
0
def main_train(args, files, tf_config):
    assert args.logdir != '', 'logdir cannot be empty'
    logdir = os.path.join(args.logdir, 'tf_output')

    if os.path.isdir(logdir):
        do_not_delete = True
        if args.ngpus > 1:
            if hvd.rank() == 0:
                if args.force_continue:
                    do_not_delete = True
                else:
                    do_not_delete = False
            else:
                do_not_delete = True

        elif HEADLESS:
            if args.force_continue:
                do_not_delete = True
            else:
                raise ValueError('{} exists'.format(logdir))
        else:
            while True:
                try:
                    key = input(
                        '{} \n do you want to continue?'.format(logdir))
                except NameError:
                    key = 'y'
                if key == 'y':
                    break
                elif key == 'n':
                    do_not_delete = False
                    break
                else:
                    print('invalid key')

        if not do_not_delete:
            print('******* Deleting {} *******'.format(logdir))
            os.system('rm -r {}'.format(logdir))
        else:
            print('continuing')
    elif args.ngpus == 1 or hvd.rank() == 0:
        os.makedirs(logdir)

    print('logdir is {}'.format(logdir))

    tf_output, pc_reader = build_tf_ops(
        args=args,
        data_dict=None,
        files=files,
    )

    train_op, summary_op, tf_data_dict, logger_dict, tf_step = tf_output

    summary_hook = tf.train.SummarySaverHook(
        summary_op=summary_op,
        output_dir=logdir,
        save_steps=args.save_steps,
    )

    logging_hook = tf.train.LoggingTensorHook(
        tensors=logger_dict,
        every_n_iter=args.log_steps,
    )

    hooks = []
    if args.ngpus > 1:
        hooks.append(hvd.BroadcastGlobalVariablesHook(0))
        if hvd.rank() == 0:
            checkpoint_dir = logdir
            save_checkpoint_secs = 300
            hooks += [logging_hook, summary_hook]
        else:
            checkpoint_dir = None
            save_checkpoint_secs = 0
            hooks += [logging_hook]
    else:
        hooks = [logging_hook, summary_hook]
        checkpoint_dir = logdir
        save_checkpoint_secs = 300

    if args.init_checkpoint_folder != '':
        checkpoint = tf.train.latest_checkpoint(args.init_checkpoint_folder)
        tf.train.init_from_checkpoint(checkpoint, {'/': '/'})

    yaml_path = os.path.join(args.logdir, 'args.yaml')

    with open(yaml_path, 'w') as yaml_file:
        yaml.dump(args, yaml_file)

    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=checkpoint_dir,
            hooks=hooks,
            save_summaries_secs=0,
            save_checkpoint_secs=save_checkpoint_secs,
            config=tf_config,
    ) as mon_sess:
        start_time = time.time()
        print(time.time() - start_time)
        writer = SummaryWriterCache.get(logdir)

        while not mon_sess.should_stop():
            # print('hvd rank = {}, current_index = {}, nfiles = {}'.format(current_index, hvd.rank(), len(my_files)))
            tensor_list = [tf_step, tf_data_dict]
            if args.training_splits == 'train':
                tensor_list += [train_op] + tensor_list
            # _, step, data_dict = mon_sess.run(tensor_list)
            mon_sess.run(tensor_list)
Example #23
0
def train(fps, args):
    with tf.name_scope('loader'):
        x, cond_text, _ = loader.get_batch(fps,
                                           args.train_batch_size,
                                           _WINDOW_LEN,
                                           args.data_first_window,
                                           conditionals=True,
                                           name='batch')
        wrong_audio = loader.get_batch(fps,
                                       args.train_batch_size,
                                       _WINDOW_LEN,
                                       args.data_first_window,
                                       conditionals=False,
                                       name='wrong_batch')
    # wrong_cond_text, wrong_cond_text_embed = loader.get_batch(fps, args.train_batch_size, _WINDOW_LEN, args.data_first_window, wavs=False, conditionals=True, name='batch')

    # Make z vector
    z = tf.random_normal([args.train_batch_size, _D_Z])

    embed = hub.Module('https://tfhub.dev/google/elmo/2',
                       trainable=False,
                       name='embed')
    cond_text_embed = embed(cond_text)

    # Add conditioning input to the model
    args.wavegan_g_kwargs['context_embedding'] = cond_text_embed
    args.wavegan_d_kwargs['context_embedding'] = args.wavegan_g_kwargs[
        'context_embedding']

    lod = tf.placeholder(tf.float32, shape=[])

    with tf.variable_scope('G'):
        # Make generator
        G_z, c_kl_loss = WaveGANGenerator(z,
                                          lod,
                                          train=True,
                                          **args.wavegan_g_kwargs)
        if args.wavegan_genr_pp:
            with tf.variable_scope('pp_filt'):
                G_z = tf.layers.conv1d(G_z,
                                       1,
                                       args.wavegan_genr_pp_len,
                                       use_bias=False,
                                       padding='same')

    # Summarize
    G_z_rms = tf.sqrt(tf.reduce_mean(tf.square(G_z[:, :, 0]), axis=1))
    x_rms = tf.sqrt(tf.reduce_mean(tf.square(x[:, :, 0]), axis=1))
    x_rms_lod_4 = tf.sqrt(
        tf.reduce_mean(tf.square(avg_downsample(x)[:, :, 0]), axis=1))
    x_rms_lod_3 = tf.sqrt(
        tf.reduce_mean(tf.square(avg_downsample(avg_downsample(x))[:, :, 0]),
                       axis=1))
    x_rms_lod_2 = tf.sqrt(
        tf.reduce_mean(tf.square(
            avg_downsample(avg_downsample(avg_downsample(x)))[:, :, 0]),
                       axis=1))
    x_rms_lod_1 = tf.sqrt(
        tf.reduce_mean(tf.square(
            avg_downsample(avg_downsample(avg_downsample(
                avg_downsample(x))))[:, :, 0]),
                       axis=1))
    x_rms_lod_0 = tf.sqrt(
        tf.reduce_mean(tf.square(
            avg_downsample(
                avg_downsample(
                    avg_downsample(avg_downsample(avg_downsample(x)))))[:, :,
                                                                        0]),
                       axis=1))
    tf.summary.histogram('x_rms_batch', x_rms)
    tf.summary.histogram('G_z_rms_batch', G_z_rms)
    tf.summary.scalar('x_rms', tf.reduce_mean(x_rms))
    tf.summary.scalar('x_rms_lod_4', tf.reduce_mean(x_rms_lod_4))
    tf.summary.scalar('x_rms_lod_3', tf.reduce_mean(x_rms_lod_3))
    tf.summary.scalar('x_rms_lod_2', tf.reduce_mean(x_rms_lod_2))
    tf.summary.scalar('x_rms_lod_1', tf.reduce_mean(x_rms_lod_1))
    tf.summary.scalar('x_rms_lod_0', tf.reduce_mean(x_rms_lod_0))
    tf.summary.scalar('G_z_rms', tf.reduce_mean(G_z_rms))
    tf.summary.audio('x', x, _FS, max_outputs=10)
    tf.summary.audio('G_z', G_z, _FS, max_outputs=10)
    tf.summary.text('Conditioning Text', cond_text[:10])

    # with tf.variable_scope('G'):
    #   # Make history buffer
    #   history_buffer = HistoryBuffer(_WINDOW_LEN, args.train_batch_size * 100, args.train_batch_size)

    #   # Select half of batch from history buffer
    #   g_from_history, r_from_history, embeds_from_history = history_buffer.get_from_history_buffer()
    #   new_fake_batch = tf.concat([G_z[:tf.shape(G_z)[0] - tf.shape(g_from_history)[0]], g_from_history], 0) # Use tf.shape to handle case when g_from_history is empty
    #   new_cond_embeds = tf.concat([cond_text_embed[:tf.shape(cond_text_embed)[0] - tf.shape(embeds_from_history)[0]], embeds_from_history], 0)
    #   new_real_batch = tf.concat([x[:tf.shape(x)[0] - tf.shape(r_from_history)[0]], r_from_history], 0)
    #   with tf.control_dependencies([new_fake_batch, new_real_batch, new_cond_embeds]):
    #     with tf.control_dependencies([history_buffer.add_to_history_buffer(G_z, x, cond_text_embed)]):
    #       G_z = tf.identity(new_fake_batch)
    #       x = tf.identity(new_real_batch)
    #       args.wavegan_g_kwargs['context_embedding'] = tf.identity(new_cond_embeds)
    #       args.wavegan_d_kwargs['context_embedding'] = args.wavegan_g_kwargs['context_embedding']
    #   G_z.set_shape([args.train_batch_size, _WINDOW_LEN, 1])
    #   x.set_shape([args.train_batch_size, _WINDOW_LEN, 1])

    G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='G')

    # Print G summary
    print('-' * 80)
    print('Generator vars')
    nparams = 0
    for v in G_vars:
        v_shape = v.get_shape().as_list()
        v_n = reduce(lambda x, y: x * y, v_shape)
        nparams += v_n
        print('{} ({}): {}'.format(v.get_shape().as_list(), v_n, v.name))
    print('Total params: {} ({:.2f} MB)'.format(nparams, (float(nparams) * 4) /
                                                (1024 * 1024)))

    # Summarize
    # tf.summary.scalar('history_buffer_size', history_buffer.current_size)
    # tf.summary.scalar('g_from_history_size', tf.shape(g_from_history)[0])
    # tf.summary.scalar('r_from_history_size', tf.shape(r_from_history)[0])
    # tf.summary.scalar('embeds_from_history_size', tf.shape(embeds_from_history)[0])
    # tf.summary.audio('G_z_history', g_from_history, _FS, max_outputs=10)
    # tf.summary.audio('x_history', r_from_history, _FS, max_outputs=10)
    tf.summary.audio('wrong_audio', wrong_audio, _FS, max_outputs=10)
    tf.summary.scalar('Conditional Resample - KL-Loss', c_kl_loss)
    # tf.summary.scalar('embed_error_cosine', tf.reduce_sum(tf.multiply(cond_text_embed, expected_embed)) / (tf.norm(cond_text_embed) * tf.norm(expected_embed)))
    # tf.summary.scalar('embed_error_cosine_wrong', tf.reduce_sum(tf.multiply(wrong_cond_text_embed, expected_embed)) / (tf.norm(wrong_cond_text_embed) * tf.norm(expected_embed)))

    # Make real discriminator
    with tf.name_scope('D_x'), tf.variable_scope('D'):
        D_x = WaveGANDiscriminator(x, lod, **args.wavegan_d_kwargs)
    D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='D')

    # Print D summary
    print('-' * 80)
    print('Discriminator vars')
    nparams = 0
    for v in D_vars:
        v_shape = v.get_shape().as_list()
        v_n = reduce(lambda x, y: x * y, v_shape)
        nparams += v_n
        print('{} ({}): {}'.format(v.get_shape().as_list(), v_n, v.name))
    print('Total params: {} ({:.2f} MB)'.format(nparams, (float(nparams) * 4) /
                                                (1024 * 1024)))
    print('-' * 80)

    # Make fake / wrong discriminator
    with tf.name_scope('D_G_z'), tf.variable_scope('D', reuse=True):
        D_G_z = WaveGANDiscriminator(G_z, lod, **args.wavegan_d_kwargs)
    with tf.name_scope('D_w'), tf.variable_scope('D', reuse=True):
        D_w = WaveGANDiscriminator(wrong_audio, lod, **args.wavegan_d_kwargs)

    # Create loss
    D_clip_weights = None
    if args.wavegan_loss == 'dcgan':
        fake = tf.zeros([args.train_batch_size, 1], dtype=tf.float32)
        real = tf.ones([args.train_batch_size, 1], dtype=tf.float32)

        # Conditional G Loss
        G_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[0],
                                                    labels=real))
        G_loss += c_kl_loss

        # Unconditional G Loss
        if args.use_extra_uncond_loss:
            G_loss += tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[1],
                                                        labels=real))
            G_loss /= 2

        # Conditional D Losses
        D_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[0],
                                                    labels=fake))
        D_loss_wrong = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=D_w[0],
                                                    labels=fake))
        D_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=D_x[0],
                                                    labels=real))

        # Unconditional D Losses
        if args.use_extra_uncond_loss:
            D_loss_fake_uncond = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[1],
                                                        labels=fake))
            D_loss_wrong_uncond = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=D_w[1],
                                                        labels=real))
            D_loss_real_uncond = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=D_x[1],
                                                        labels=real))

            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \
                   + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond
            D_loss /= 2
        else:
            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake)

        # Warmup Conditional Loss
        # D_warmup_loss = D_loss_real + D_loss_wrong
    elif args.wavegan_loss == 'lsgan':
        # Conditional G Loss
        G_loss = tf.reduce_mean((D_G_z[0] - 1.)**2)
        G_loss += c_kl_loss

        # Unconditional G Loss
        if args.use_extra_uncond_loss:
            G_loss += tf.reduce_mean((D_G_z[1] - 1.)**2)
            G_loss /= 2

        # Conditional D Loss
        D_loss_real = tf.reduce_mean((D_x[0] - 1.)**2)
        D_loss_wrong = tf.reduce_mean(D_w[0]**2)
        D_loss_fake = tf.reduce_mean(D_G_z[0]**2)

        # Unconditional D Loss
        if args.use_extra_uncond_loss:
            D_loss_real_uncond = tf.reduce_mean((D_x[1] - 1.)**2)
            D_loss_wrong_uncond = tf.reduce_mean((D_w[1] - 1.)**2)
            D_loss_fake_uncond = tf.reduce_mean(D_G_z[1]**2)

            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \
                   + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond
            D_loss /= 2
        else:
            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake)

        # Warmup Conditional Loss
        # D_warmup_loss = D_loss_real + D_loss_wrong
    elif args.wavegan_loss == 'wgan':
        # Conditional G Loss
        G_loss = -tf.reduce_mean(D_G_z[0])
        G_loss += c_kl_loss

        # Unconditional G Loss
        if args.use_extra_uncond_loss:
            G_loss += -tf.reduce_mean(D_G_z[1])
            G_loss /= 2

        # Conditional D Loss
        D_loss_real = -tf.reduce_mean(D_x[0])
        D_loss_wrong = tf.reduce_mean(D_w[0])
        D_loss_fake = tf.reduce_mean(D_G_z[0])

        # Unconditional D Loss
        if args.use_extra_uncond_loss:
            D_loss_real_uncond = -tf.reduce_mean(D_x[1])
            D_loss_wrong_uncond = -tf.reduce_mean(D_w[1])
            D_loss_fake_uncond = tf.reduce_mean(D_G_z[1])

            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \
                   + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond
            D_loss /= 2
        else:
            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake)

        # Warmup Conditional Loss
        # D_warmup_loss = D_loss_real + D_loss_wrong

        with tf.name_scope('D_clip_weights'):
            clip_ops = []
            for var in D_vars:
                clip_bounds = [-.01, .01]
                clip_ops.append(
                    tf.assign(
                        var,
                        tf.clip_by_value(var, clip_bounds[0], clip_bounds[1])))
            D_clip_weights = tf.group(*clip_ops)
    elif args.wavegan_loss == 'wgan-gp':
        # Conditional G Loss
        G_loss = -tf.reduce_mean(D_G_z[0])
        G_loss += c_kl_loss

        # Unconditional G Loss
        if args.use_extra_uncond_loss:
            G_loss += -tf.reduce_mean(D_G_z[1])
            G_loss /= 2

        # Conditional D Loss
        D_loss_real = -tf.reduce_mean(D_x[0])
        D_loss_wrong = tf.reduce_mean(D_w[0])
        D_loss_fake = tf.reduce_mean(D_G_z[0])

        # Unconditional D Loss
        if args.use_extra_uncond_loss:
            D_loss_real_uncond = -tf.reduce_mean(D_x[1])
            D_loss_wrong_uncond = -tf.reduce_mean(D_w[1])
            D_loss_fake_uncond = tf.reduce_mean(D_G_z[1])

            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \
                   + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond
            D_loss /= 2
        else:
            D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake)

        # Warmup Conditional Loss
        # D_warmup_loss = D_loss_real + D_loss_wrong

        # Conditional Gradient Penalty
        alpha = tf.random_uniform(shape=[args.train_batch_size, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        real = x
        fake = tf.concat([
            G_z[:args.train_batch_size // 2],
            wrong_audio[:args.train_batch_size // 2]
        ], 0)
        differences = fake - real
        interpolates = real + (alpha * differences)
        with tf.name_scope('D_interp'), tf.variable_scope('D', reuse=True):
            D_interp = WaveGANDiscriminator(
                interpolates, lod,
                **args.wavegan_d_kwargs)[0]  # Only want conditional output
        gradients = tf.gradients(D_interp, [interpolates])[0]
        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
        cond_gradient_penalty = tf.reduce_mean((slopes - 1.)**2.)

        # Unconditional Gradient Penalty
        alpha = tf.random_uniform(shape=[args.train_batch_size, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        real = tf.concat([
            x[:args.train_batch_size // 2],
            wrong_audio[:args.train_batch_size // 2]
        ], 0)
        fake = G_z
        differences = fake - real
        interpolates = real + (alpha * differences)
        with tf.name_scope('D_interp'), tf.variable_scope('D', reuse=True):
            D_interp = WaveGANDiscriminator(
                interpolates, lod,
                **args.wavegan_d_kwargs)[1]  # Only want unconditional output
        gradients = tf.gradients(D_interp, [interpolates])[0]
        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
        uncond_gradient_penalty = tf.reduce_mean((slopes - 1.)**2.)

        # Warmup Gradient Penalty
        # alpha = tf.random_uniform(shape=[args.train_batch_size, 1, 1], minval=0., maxval=1.)
        # real = x
        # fake = wrong_audio
        # differences = fake - real
        # interpolates = real + (alpha * differences)
        # with tf.name_scope('D_interp'), tf.variable_scope('D', reuse=True):
        #   D_interp = WaveGANDiscriminator(interpolates, lod, **args.wavegan_d_kwargs)[0] # Only want conditional output
        # gradients = tf.gradients(D_interp, [interpolates])[0]
        # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
        # warmup_gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2.)

        gradient_penalty = (cond_gradient_penalty +
                            uncond_gradient_penalty) / 2

        LAMBDA = 10
        D_loss += LAMBDA * gradient_penalty
        # D_warmup_loss += LAMBDA * warmup_gradient_penalty
    else:
        raise NotImplementedError()

    tf.summary.scalar('G_loss', G_loss)
    if (args.wavegan_loss == 'wgan-gp'):
        tf.summary.scalar('Gradient Penalty', LAMBDA * gradient_penalty)
    if (args.wavegan_loss == 'wgan' or args.wavegan_loss == 'wgan-gp'):
        if args.use_extra_uncond_loss:
            tf.summary.scalar('Critic Score - Real Data - Condition Match',
                              -D_loss_real)
            tf.summary.scalar('Critic Score - Fake Data - Condition Match',
                              D_loss_fake)
            tf.summary.scalar('Critic Score - Wrong Data - Condition Match',
                              D_loss_wrong)
            tf.summary.scalar('Critic Score - Real Data', -D_loss_real_uncond)
            tf.summary.scalar('Critic Score - Wrong Data',
                              -D_loss_wrong_uncond)
            tf.summary.scalar('Critic Score - Fake Data', D_loss_fake_uncond)
            tf.summary.scalar('Wasserstein Distance - No Regularization Term',
                              -((D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \
                               + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond) / 2))
            tf.summary.scalar('Wasserstein Distance - Real-Wrong Only',
                              -(D_loss_real + D_loss_wrong))
            tf.summary.scalar('Wasserstein Distance - Real-Fake Only',
                              -((D_loss_real + D_loss_fake \
                               + D_loss_real_uncond + D_loss_fake_uncond) / 2))
        else:
            tf.summary.scalar('Critic Score - Real Data', -D_loss_real)
            tf.summary.scalar('Critic Score - Wrong Data', D_loss_wrong)
            tf.summary.scalar('Critic Score - Fake Data', D_loss_fake)
            tf.summary.scalar(
                'Wasserstein Distance - No Regularization Term',
                -(D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake)))
        tf.summary.scalar('Wasserstein Distance - With Regularization Term',
                          -D_loss)
    else:
        if args.use_extra_uncond_loss:
            tf.summary.scalar('D_acc_uncond', 0.5 * ((0.5 * (tf.reduce_mean(tf.sigmoid(D_x[1])) + tf.reduce_mean(tf.sigmoid(D_w[1])))) \
                                                   + tf.reduce_mean(1 - tf.sigmoid(D_G_z[1]))))
            tf.summary.scalar('D_acc', 0.5 * (tf.reduce_mean(tf.sigmoid(D_x[0])) \
                                            + 0.5 * (tf.reduce_mean(1 - tf.sigmoid(D_w[0])) + tf.reduce_mean(1 - tf.sigmoid(D_G_z[0])))))
            tf.summary.scalar('D_acc_real_wrong_only', 0.5 * (tf.reduce_mean(tf.sigmoid(D_x[0])) \
                                                            + tf.reduce_mean(1 - tf.sigmoid(D_w[0]))))
            tf.summary.scalar('D_loss_cond_real', D_loss_real)
            tf.summary.scalar('D_loss_uncond_real', D_loss_real_uncond)
            tf.summary.scalar('D_loss_cond_wrong', D_loss_wrong)
            tf.summary.scalar('D_loss_uncond_wrong', D_loss_wrong_uncond)
            tf.summary.scalar('D_loss_cond_fake', D_loss_fake)
            tf.summary.scalar('D_loss_uncond_fake', D_loss_fake_uncond)
            tf.summary.scalar('D_loss_unregularized',
                               (D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \
                              + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond) / 2)
        else:
            tf.summary.scalar('D_acc', 0.5 * (tf.reduce_mean(tf.sigmoid(D_x[0])) \
                                            + 0.5 * (tf.reduce_mean(1 - tf.sigmoid(D_w[0])) + tf.reduce_mean(1 - tf.sigmoid(D_G_z[0])))))
            tf.summary.scalar('D_loss_real', D_loss_real)
            tf.summary.scalar('D_loss_wrong', D_loss_wrong)
            tf.summary.scalar('D_loss_fake', D_loss_fake)
            tf.summary.scalar('D_loss_unregularized',
                              D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake))
        tf.summary.scalar('D_loss', D_loss)

    # Create (recommended) optimizer
    if args.wavegan_loss == 'dcgan':
        G_opt = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5)
        D_opt = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5)
    elif args.wavegan_loss == 'lsgan':
        G_opt = tf.train.RMSPropOptimizer(learning_rate=1e-4)
        D_opt = tf.train.RMSPropOptimizer(learning_rate=1e-4)
    elif args.wavegan_loss == 'wgan':
        G_opt = tf.train.RMSPropOptimizer(learning_rate=5e-5)
        D_opt = tf.train.RMSPropOptimizer(learning_rate=5e-5)
    elif args.wavegan_loss == 'wgan-gp':
        G_opt = tf.train.AdamOptimizer(learning_rate=4e-4,
                                       beta1=0.0,
                                       beta2=0.9)
        D_opt = tf.train.AdamOptimizer(learning_rate=4e-4,
                                       beta1=0.0,
                                       beta2=0.9)
    else:
        raise NotImplementedError()

    # Optimizer internal state reset ops
    reset_G_opt_op = tf.variables_initializer(G_opt.variables())
    reset_D_opt_op = tf.variables_initializer(D_opt.variables())

    # Create training ops
    G_train_op = G_opt.minimize(
        G_loss,
        var_list=G_vars,
        global_step=tf.train.get_or_create_global_step())
    D_train_op = D_opt.minimize(D_loss, var_list=D_vars)

    def smoothstep(x, mi, mx):
        return mi + (mx - mi) * (lambda t: np.where(
            t < 0, 0, np.where(t <= 1, 3 * t**2 - 2 * t**3, 1)))(x)

    def np_lerp_clip(t, a, b):
        return a + (b - a) * np.clip(t, 0.0, 1.0)

    def get_lod_at_step(step):
        return np.piecewise(float(step), [
            step < 10000, 10000 <= step < 20000, 20000 <= step < 30000,
            30000 <= step < 40000, 40000 <= step < 50000,
            50000 <= step < 60000, 60000 <= step < 70000,
            70000 <= step < 80000, 80000 <= step < 90000,
            90000 <= step < 100000
        ], [
            0, lambda x: np_lerp_clip((x - 10000) / 10000, 0, 1), 1,
            lambda x: np_lerp_clip(
                (x - 30000) / 10000, 1, 2), 2, lambda x: np_lerp_clip(
                    (x - 50000) / 10000, 2, 3), 3, lambda x: np_lerp_clip(
                        (x - 70000) / 10000, 3, 4), 4, lambda x: np_lerp_clip(
                            (x - 90000) / 10000, 4, 5), 5
        ])

    def my_filter_callable(datum, tensor):
        if (not isinstance(tensor, debug_data.InconvertibleTensorProto)) and (
                tensor.dtype == np.float32 or tensor.dtype == np.float64):
            return np.any([
                np.any(np.greater_equal(tensor, 50.0)),
                np.any(np.less_equal(tensor, -50.0))
            ])
        else:
            return False

    # Create a LocalCLIDebugHook and use it as a monitor
    # debug_hook = tf_debug.LocalCLIDebugHook(dump_root='C:/d/t/')
    # debug_hook.add_tensor_filter('large_values', my_filter_callable)
    # hooks = [debug_hook]

    # Run training
    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=args.train_dir,
            save_checkpoint_secs=args.train_save_secs,
            save_summaries_secs=args.train_summary_secs) as sess:
        # Get the summary writer for writing extra summary statistics
        summary_writer = SummaryWriterCache.get(args.train_dir)

        cur_lod = 0
        while True:
            # Calculate Maximum LOD to train
            step = sess.run(tf.train.get_or_create_global_step(),
                            feed_dict={lod: cur_lod})
            cur_lod = get_lod_at_step(step)
            prev_lod = get_lod_at_step(step - 1)

            # Reset optimizer internal state when new layers are introduced
            if np.floor(cur_lod) != np.floor(prev_lod) or np.ceil(
                    cur_lod) != np.ceil(prev_lod):
                print(
                    "Resetting optimizers' internal states at step {}".format(
                        step))
                sess.run([reset_G_opt_op, reset_D_opt_op],
                         feed_dict={lod: cur_lod})

            # Output current LOD and 'steps at currrent LOD' to tensorboard
            step = float(
                sess.run(tf.train.get_or_create_global_step(),
                         feed_dict={lod: cur_lod}))
            lod_summary = tf.Summary(value=[
                tf.Summary.Value(tag="current_lod",
                                 simple_value=float(cur_lod)),
            ])
            summary_writer.add_summary(lod_summary, step)

            # Train discriminator
            for i in xrange(args.wavegan_disc_nupdates):
                sess.run(D_train_op, feed_dict={lod: cur_lod})

                # Enforce Lipschitz constraint for WGAN
                if D_clip_weights is not None:
                    sess.run(D_clip_weights, feed_dict={lod: cur_lod})

            # Train generator
            sess.run(G_train_op, feed_dict={lod: cur_lod})
 def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None):
   self._summary_tag = "global_step/sec"
   self._every_n_steps = every_n_steps
   self._summary_writer = summary_writer
   if summary_writer is None and output_dir:
     self._summary_writer = SummaryWriterCache.get(output_dir)
Example #25
0
def exp(dir_prefix,
        tf_log_dir="ckpt",
        our_log_dir="logging",
        replay_cache_dir="ReplayBufferCache",
        gpu_mem_fraction=0.15,
        save_checkpoint_secs=3600):
    n_skip = 6
    n_stack = 3
    if_random_phase = True
    # === Agent
    # --- agent basic
    ALL_ACTIONS = [(ord(mode), ) for mode in ['s', 'd', 'a']] + [(0, )]
    AGENT_ACTIONS = ALL_ACTIONS[:3]
    num_actions = len(AGENT_ACTIONS)
    noop = 3
    gamma = 0.9
    greedy_epsilon = CappedLinear(int(3e4), 0.2, 0.05)
    # --- replay buffer
    # replay_upsample_bias = (1, 1, 1, 0.1)
    # --- NN architecture
    f_net = lambda inputs: f_dueling_q(inputs, num_actions)
    if_ddqn = True
    # --- optimization
    batch_size = 8
    learning_rate = 1e-4
    target_sync_interval = 1
    target_sync_rate = 1e-3
    update_interval = 1
    max_grad_norm = 1.0
    sample_mimimum_count = 100
    update_ratio = 8.0
    # --- logging and ckpt
    replay_capacity = 300
    replay_ratio_active = 1.0

    # ===  Reward function
    class FuncReward(object):
        def __init__(self, gamma):
            self.__gamma = gamma
            self._ema_speed = 10.0
            self._ema_dist = 0.0
            self._obs_risk = 0.0
            self._road_change = False
            self._mom_opp = 0.0
            self._mom_biking = 0.0
            self._steering = False
            self._waiting_steps = 0

        def reset(self):
            self._ema_speed = 10.0
            self._ema_dist = 0.0
            self._obs_risk = 0.0
            self._road_change = False
            self._mom_opp = 0.0
            self._mom_biking = 0.0
            self._steering = False

        def _func_scalar_reward(self, rewards, action):
            """Coverts a vector reward into a scalar."""
            info = {}

            # append a reward that is 1 when action is lane switching
            rewards = rewards.tolist()
            print(' ' * 3 + 'R: [' + '{:4.2f} ' * len(rewards) +
                  ']').format(*rewards),

            # extract relevant rewards.
            speed = rewards[0]
            dist = rewards[1]
            obs_risk = rewards[2]
            # road_invalid = rewards[3] > 0.01  # any yellow or red
            road_change = rewards[4] > 0.01  # entering intersection
            opp = rewards[5]
            biking = rewards[6]
            # inner = rewards[7]
            # outter = rewards[8]
            steer = np.logical_or(action == 1, action == 2)

            if speed < 0.1:
                self._waiting_steps += 1
            else:
                self._waiting_steps = 0

            # update reward-related state vars
            ema_speed = 0.5 * self._ema_speed + 0.5 * speed
            ema_dist = 1.0 if dist > 2.0 else 0.9 * self._ema_dist
            mom_opp = min((opp < 0.5) * (self._mom_opp + 1), 20)
            mom_biking = min((biking > 0.5) * (self._mom_biking + 1), 12)
            steering = steer if action != 3 else self._steering
            self._ema_speed = ema_speed
            self._ema_dist = ema_dist
            self._obs_risk = obs_risk
            self._road_change = road_change
            self._mom_opp = mom_opp
            self._mom_biking = mom_biking
            self._steering = steering
            print '{:3.0f}, {:3.0f}, {:4.2f}, {:3.0f}'.format(
                mom_opp, mom_biking, ema_dist, self._steering),
            info['reward_fun/speed'] = speed
            info['reward_fun/dist2longest'] = dist
            info['reward_fun/obs_risk'] = obs_risk
            info['reward_fun/road_change'] = road_change
            info['reward_fun/on_opposite'] = opp
            info['reward_fun/on_biking'] = biking
            info['reward_fun/steer'] = steer
            info['reward_fun/mom_opposite'] = mom_opp
            info['reward_fun/mom_biking'] = mom_biking
            info['waiting_steps'] = self._waiting_steps

            # calculate scalar reward
            reward = [
                # velocity
                speed * 10 - 10,
                # obs factor
                -100.0 * obs_risk,
                # opposite
                -20 * (0.9 + 0.1 * mom_opp) * (mom_opp > 1.0),
                # ped
                -40 * (0.9 + 0.1 * mom_biking) * (mom_biking > 1.0),
                # steer
                steering * -40.0,
            ]
            reward = np.sum(reward) / 100.0
            print ': {:5.2f}'.format(reward)

            return reward, info

        def _func_early_stopping(self):
            """Several early stopping criterion."""
            info = {}
            done = False
            # switched lane while going into intersection.
            if self._road_change and self._ema_dist > 0.2:
                print "[Episode early stopping] turned into intersection."
                done = True
                info['banned_road_change'] = True

            # used biking lane to cross intersection
            if self._road_change and self._mom_biking > 0:
                print "[Episode early stopping] entered intersection on biking lane."
                done = True
                info['banned_road_change'] = True

            # hit obstacle
            if self._obs_risk > 1.0:
                print "[Episode early stopping] hit obstacle."
                done = True

            # waiting too long
            if self._waiting_steps > 80:
                print "[Episode early stopping] waiting too long"
                done = True

            return done, info

        def _func_skipping_bias(self,
                                reward,
                                done,
                                info,
                                n_skip=1,
                                cnt_skip=0):
            new_info = {}
            if 'banned_road_change' in info:
                reward -= 1.0 * (n_skip - cnt_skip)
            if done:
                pass
                # reward /= (1 - self.__gamma) / (n_skip - cnt_skip)
            new_info['reward_fun/reward'] = reward
            return reward, new_info

        def __call__(self, action, rewards, done, n_skip=1, cnt_skip=0):
            info = {}
            reward, info_diff = self._func_scalar_reward(rewards, action)
            info.update(info_diff)
            early_done, info_diff = self._func_early_stopping()
            done = done | early_done
            info.update(info_diff)
            reward, info_diff = self._func_skipping_bias(
                reward, done, info, n_skip, cnt_skip)
            info.update(info_diff)
            if done:
                info['flag_success'] = reward > 0.0
                self.reset()

            return reward, done, info

    # ==========================================
    # ==========================================
    # ==========================================

    env, replay_buffer, _agent = None, None, None
    try:
        # Parse flags
        tf_log_dir = os.sep.join([dir_prefix, tf_log_dir])
        our_log_dir = os.sep.join([dir_prefix, our_log_dir])
        replay_cache_dir = os.sep.join([dir_prefix, replay_cache_dir])

        summary_writer = SummaryWriterCache.get(tf_log_dir)

        # global_step = tf.get_variable(
        #     'global_step', [], dtype=tf.int32,
        #     initializer=tf.constant_initializer(0), trainable=False)

        # Environment
        def gen_default_backend_cmds():
            ws_path = '/Projects/catkin_ws/'
            initialD_path = '/Projects/hobotrl/playground/initialD/'
            backend_path = initialD_path + 'ros_environments/backend_scripts/'
            utils_path = initialD_path + 'ros_environments/backend_scripts/utils/'
            backend_cmds = [
                ['python', utils_path + '/iterate_test_case.py'],
                # Parse maps
                [
                    'python', utils_path + 'parse_map.py',
                    ws_path + 'src/Map/src/map_api/data/honda_wider.xodr',
                    utils_path + 'road_segment_info.txt'
                ],
                # Start roscore
                ['roscore'],
                # Reward function script
                ['python', backend_path + 'gazebo_rl_reward.py'],
                # Road validity node script
                [
                    'python', backend_path + 'road_validity.py',
                    utils_path + 'road_segment_info.txt.signal'
                ],
                # Simulation restarter backend
                ['python', backend_path + 'rviz_restart.py', 'next.launch'],
                # Video capture
                ['python', backend_path + 'car_go.py', '--use-dummy-action']
            ]
            return backend_cmds

        env = DrSimRuleDecisionK8S(
            image_uri='docker.hobot.cc/carsim/simulator_gpu_kub:0.0.10_384.111',
            backend_cmds=gen_default_backend_cmds(),
            is_dummy_action=True)
        # Agent
        state_shape = env.observation_space.shape
        # Utilities
        stepsSaver = StepsSaver(our_log_dir)
        reward_vector2scalar = FuncReward(gamma)
        # Configure sess
        n_ep = 0
        n_total_steps = 0
        # GoGoGo
        while n_total_steps <= 2.5e5:
            cum_reward = 0.0
            n_ep_steps = 0
            state_action = env.reset()
            state, action = state_action
            print "action: ", action
            # print "state shape: {}".format(state.shape)
            while True:
                next_state_action, vec_reward, done, env_info = env.step(3)
                next_state, next_action = next_state_action
                reward, done, reward_info = reward_vector2scalar(
                    action, vec_reward, done)
                env_info.update(reward_info)
                summary_proto = log_info(
                    {},
                    env_info,
                    done,
                    cum_reward,
                    n_ep,
                    n_ep_steps,
                    n_total_steps,
                )
                summary_writer.add_summary(summary_proto, n_total_steps)
                n_total_steps += 1
                n_ep_steps += 1
                cum_reward += reward
                flag_success = reward_info['flag_success'] \
                    if 'flag_success' in reward_info else False
                stepsSaver.save(n_ep, n_ep_steps, state, action, vec_reward,
                                reward, done, cum_reward, flag_success)
                state, action = next_state, next_action
                print "action: ", action
                if done:
                    n_ep += 1
                    logging.warning(
                        "Episode {} finished in {} steps, reward is {}.".
                        format(
                            n_ep,
                            n_ep_steps,
                            cum_reward,
                        ))
                    break
            if n_ep >= 100:
                break

    except Exception as e:
        print e.message
        traceback.print_exc()
    finally:
        logging.warning("=" * 30)
        logging.warning("=" * 30)
        logging.warning("Tidying up...")
        # kill orphaned monitor daemon process
        if env is not None:
            env.env.exit()
        replay_buffer.close()
        if replay_buffer is not None:
            replay_buffer.close()
        if _agent is not None:
            _agent.stop()
        # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
        import time
        logging.warning("waiting for k8s end")
        time.sleep(180)
        logging.warning("=" * 30)
Example #26
0
# Loss
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=labels_t)
# Optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=config['lr'])
global_step_t = tf.train.create_global_step()
train_op = optimizer.minimize(loss, global_step=global_step_t)

# Session
sess = tf.train.MonitoredTrainingSession(checkpoint_dir=logdir,
                                         save_checkpoint_secs=60,
                                         save_summaries_steps=50)

# Summaries
summary_writer = SummaryWriterCache.get(logdir)
metrics_logger = experiment_logging.TensorboardLogger(writer=summary_writer)
shutil.copyfile(config_path, logdir + '/config.py')  # save config in logdir

# Fetch entire dev set (no need to do this inside the eval loop repeatedly)
image_dev, question_dev, label_dev = next(batcher_dev)
dev_feed_dict = {
    images_t: image_dev,
    questions_t: question_dev,
    labels_t: label_dev
}

# Train-Eval loop
while True:
    image, question, label = next(batcher_train)
    current_step, train_loss, _ = sess.run([global_step_t, loss, train_op],
Example #27
0
def train():
  """Running the main training loop with given parameters."""
  if FLAGS.task == 0 and not tf.gfile.Exists(FLAGS.output_dir):
    tf.gfile.MakeDirs(FLAGS.output_dir)

  # Read train/dev/test graphs, create datasets and model
  add_inverse_edge = FLAGS.model in \
                     ["source_rel_attention", "source_path_attention"]
  train_graph, train_data = read_graph_data(
      kg_file=FLAGS.kg_file,
      add_reverse_graph=not add_inverse_edge,
      add_inverse_edge=add_inverse_edge,
      mode="train",
      num_epochs=FLAGS.num_epochs, batchsize=FLAGS.batchsize,
      max_neighbors=FLAGS.max_neighbors,
      max_negatives=FLAGS.max_negatives,
      text_kg_file=FLAGS.text_kg_file
  )

  worker_device = "/job:{}".format(FLAGS.brain_job_name)
  with tf.device(
      tf.train.replica_device_setter(
          FLAGS.ps_tasks, worker_device=worker_device)):
    iterator = train_data.dataset.make_one_shot_iterator()
    candidate_scores, _, labels, model, is_train_ph, _ = create_model(
        train_graph, iterator
    )

  # Create train loss and training op
  loss = losses.softmax_crossentropy(logits=candidate_scores, labels=labels)
  optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
  global_step = tf.Variable(0, name="global_step", trainable=False)
  train_op = get_train_op(loss, optimizer, FLAGS.grad_clip,
                          global_step=global_step)
  tf.summary.scalar("Loss", loss)

  run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
  session_config = tf.ConfigProto(log_device_placement=True)

  # Create tf training session
  scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=1000))
  # ckpt_hook = tf.train.CheckpointSaverHook(
  #     checkpoint_dir=FLAGS.output_dir, scaffold=scaffold,
  #     save_steps=FLAGS.save_every
  # )
  # summary_hook = tf.train.SummarySaverHook(
  #     save_secs=60, output_dir=FLAGS.output_dir,
  #     summary_op=tf.summary.merge_all()
  # )
  session = tf.train.MonitoredTrainingSession(
      master=FLAGS.master,
      is_chief=(FLAGS.task == 0),
      checkpoint_dir=FLAGS.output_dir,
      save_checkpoint_steps=FLAGS.save_every,
      scaffold=scaffold,
      save_summaries_secs=60,
      # hooks=[summary_hook],
      # chief_only_hooks=[ckpt_hook],
      config=session_config
  )

  # Create embeddings visualization
  if FLAGS.task == 0:
    utils.save_embedding_vocabs(FLAGS.output_dir, train_graph,
                                FLAGS.entity_names_file)
    pconfig = projector.ProjectorConfig()
    add_embedding_to_projector(
        pconfig, model["entity_encoder"].embeddings.name.split(":")[0],
        os.path.join(FLAGS.output_dir, "entity_vocab.tsv")
    )
    add_embedding_to_projector(
        pconfig, model["relation_encoder"].embeddings.name.split(":")[0],
        os.path.join(FLAGS.output_dir, "relation_vocab.tsv")
    )
    if FLAGS.text_kg_file:
      word_embeddings = model["text_encoder"].word_embedding_encoder.embeddings
      add_embedding_to_projector(
          pconfig, word_embeddings.name.split(":")[0],
          os.path.join(FLAGS.output_dir, "word_vocab.tsv")
      )
    projector.visualize_embeddings(
        SummaryWriterCache.get(FLAGS.output_dir), pconfig
    )

  # Main training loop
  running_total_loss = 0.
  nsteps = 0
  gc.collect()
  while True:
    try:
      current_loss, _, _ = session.run(
          [loss, train_op, global_step],
          # feed_dict={is_train_ph: True, handle: train_iterator_handle},
          feed_dict={is_train_ph: True},
          options=run_options
      )
      nsteps += 1
      running_total_loss += current_loss
      tf.logging.info("Step %d, loss: %.3f, running avg loss: %.3f",
                      nsteps, current_loss, running_total_loss / nsteps)
      if nsteps %2 == 0:
        gc.collect()
    except tf.errors.OutOfRangeError:
      tf.logging.info("End of Traning Epochs after %d steps", nsteps)
      break
Example #28
0
def exp(dir_prefix,
        tf_log_dir="ckpt",
        our_log_dir="logging",
        replay_cache_dir="ReplayBufferCache",
        gpu_mem_fraction=0.15,
        save_checkpoint_secs=3600):
    n_skip = 6
    n_stack = 3
    if_random_phase = True
    # === Agent
    # --- agent basic
    ALL_ACTIONS = [(ord(mode), ) for mode in ['s', 'd', 'a']] + [(0, )]
    AGENT_ACTIONS = ALL_ACTIONS[:3]
    num_actions = len(AGENT_ACTIONS)
    noop = 3
    gamma = 0.9
    ckpt_step = 0
    greedy_epsilon = CappedLinear(
        int(3e4) - ckpt_step, 0.2 - (0.15 / 3e4 * ckpt_step), 0.05)
    start_step = ckpt_step * 6
    # --- replay buffer
    replay_bucket_size = 100
    replay_max_sample_epoch = 2
    # replay_upsample_bias = (1, 1, 1, 0.1)
    # --- NN architecture
    f_net = lambda inputs: f_dueling_q(inputs, num_actions)
    if_ddqn = True
    # --- optimization
    batch_size = 8
    learning_rate = 1e-4
    target_sync_interval = 1
    target_sync_rate = 1e-3
    update_interval = 1
    max_grad_norm = 1.0
    sample_mimimum_count = 100
    update_ratio = 8.0
    # --- logging and ckpt
    replay_capacity = 300
    replay_ratio_active = 1.0

    # ===  Reward function
    class FuncReward(object):
        def __init__(self, gamma):
            self.__gamma = gamma
            self._ema_speed = 10.0
            self._ema_dist = 0.0
            self._obs_risk = 0.0
            self._road_change = False
            self._mom_opp = 0.0
            self._mom_biking = 0.0
            self._steering = False
            self._waiting_steps = 0

        def reset(self):
            self._ema_speed = 10.0
            self._ema_dist = 0.0
            self._obs_risk = 0.0
            self._road_change = False
            self._mom_opp = 0.0
            self._mom_biking = 0.0
            self._steering = False

        def _func_scalar_reward(self, rewards, action):
            """Coverts a vector reward into a scalar."""
            info = {}

            # append a reward that is 1 when action is lane switching
            rewards = rewards.tolist()
            print(' ' * 3 + 'R: [' + '{:4.2f} ' * len(rewards) +
                  ']').format(*rewards),

            # extract relevant rewards.
            speed = rewards[0]
            dist = rewards[1]
            obs_risk = rewards[2]
            # road_invalid = rewards[3] > 0.01  # any yellow or red
            road_change = rewards[4] > 0.01  # entering intersection
            opp = rewards[5]
            biking = rewards[6]
            # inner = rewards[7]
            # outter = rewards[8]
            steer = np.logical_or(action == 1, action == 2)

            if speed < 0.1:
                self._waiting_steps += 1
            else:
                self._waiting_steps = 0

            # update reward-related state vars
            ema_speed = 0.5 * self._ema_speed + 0.5 * speed
            ema_dist = 1.0 if dist > 2.0 else 0.9 * self._ema_dist
            mom_opp = min((opp < 0.5) * (self._mom_opp + 1), 1)
            mom_biking = min((biking > 0.5) * (self._mom_biking + 1), 1)
            steering = steer if action != 3 else self._steering
            self._ema_speed = ema_speed
            self._ema_dist = ema_dist
            self._obs_risk = obs_risk
            self._road_change = road_change
            self._mom_opp = mom_opp
            self._mom_biking = mom_biking
            self._steering = steering
            print '{:3.0f}, {:3.0f}, {:4.2f}, {:3.0f}'.format(
                mom_opp, mom_biking, ema_dist, self._steering),
            info['reward_fun/speed'] = speed
            info['reward_fun/dist2longest'] = dist
            info['reward_fun/obs_risk'] = obs_risk
            info['reward_fun/road_change'] = road_change
            info['reward_fun/on_opposite'] = opp
            info['reward_fun/on_biking'] = biking
            info['reward_fun/steer'] = steer
            info['reward_fun/mom_opposite'] = mom_opp
            info['reward_fun/mom_biking'] = mom_biking
            info['waiting_steps'] = self._waiting_steps

            # calculate scalar reward
            reward = [
                # velocity
                speed * 10 - 10,
                # obs factor
                -100.0 * obs_risk,
                # opposite
                -10 * (0.9 + 0.1 * mom_opp) * (mom_opp > 0.99),
                # ped
                -10 * (0.9 + 0.1 * mom_biking) * (mom_biking > 0.99),
                # steer
                steering * -40.0,
            ]
            reward = np.sum(reward) / 100.0
            print ': {:5.2f}'.format(reward)

            return reward, info

        def _func_early_stopping(self):
            """Several early stopping criterion."""
            info = {}
            done = False
            # switched lane while going into intersection.
            if self._road_change and self._ema_dist > 0.2:
                print "[Episode early stopping] turned into intersection."
                done = True
                info['banned_road_change'] = True

            # used biking lane to cross intersection
            if self._road_change and self._mom_biking > 0:
                print "[Episode early stopping] entered intersection on biking lane."
                done = True
                info['banned_road_change'] = True

            # hit obstacle
            if self._obs_risk > 1.0:
                print "[Episode early stopping] hit obstacle."
                done = True

            # waiting too long
            if self._waiting_steps > 80:
                print "[Episode early stopping] waiting too long"
                done = True

            return done, info

        def _func_skipping_bias(self, reward, done, info, n_skip, cnt_skip):
            new_info = {}
            if 'banned_road_change' in info:
                reward -= 1.0 * (n_skip - cnt_skip)
            if done:
                pass
                # reward /= (1 - self.__gamma) / (n_skip - cnt_skip)
            new_info['reward_fun/reward'] = reward
            return reward, new_info

        def __call__(self, action, rewards, done, n_skip=1, cnt_skip=0):
            info = {}
            reward, info_diff = self._func_scalar_reward(rewards, action)
            info.update(info_diff)
            early_done, info_diff = self._func_early_stopping()
            done = done | early_done
            info.update(info_diff)
            reward, info_diff = self._func_skipping_bias(
                reward, done, info, n_skip, cnt_skip)
            info.update(info_diff)
            if done:
                info['flag_success'] = reward > 0.0
                self.reset()

            return reward, done, info

    # ==========================================
    # ==========================================
    # ==========================================

    env, replay_buffer, _agent = None, None, None
    try:
        # Parse flags
        # FLAGS = tf.app.flags.FLAGS
        tf_log_dir = os.sep.join([dir_prefix, tf_log_dir])
        our_log_dir = os.sep.join([dir_prefix, our_log_dir])
        replay_cache_dir = os.sep.join([dir_prefix, replay_cache_dir])

        # Modify tf graph
        graph = tf.get_default_graph()
        # -- create learning rate var and optimizer
        lr = tf.get_variable('learning_rate', [],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(1e-3),
                             trainable=False)
        lr_in = tf.placeholder(dtype=tf.float32)
        op_set_lr = tf.assign(lr, lr_in)
        optimizer_td = tf.train.AdamOptimizer(learning_rate=lr)
        # -- create global step variable
        global_step = tf.get_variable('global_step', [],
                                      dtype=tf.int32,
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        def gen_default_backend_cmds():
            ws_path = '/Projects/catkin_ws/'
            initialD_path = '/Projects/hobotrl/playground/initialD/'
            backend_path = initialD_path + 'ros_environments/backend_scripts/'
            utils_path = initialD_path + 'ros_environments/backend_scripts/utils/'
            backend_cmds = [
                ['python', utils_path + '/iterate_test_case.py'],
                # Parse maps
                [
                    'python', utils_path + 'parse_map.py',
                    ws_path + 'src/Map/src/map_api/data/honda_wider.xodr',
                    utils_path + 'road_segment_info.txt'
                ],
                # Start roscore
                ['roscore'],
                # Reward function script
                ['python', backend_path + 'gazebo_rl_reward.py'],
                # Road validity node script
                [
                    'python', backend_path + 'road_validity.py',
                    utils_path + 'road_segment_info.txt.signal'
                ],
                # Simulation restarter backend
                ['python', backend_path + 'rviz_restart.py', 'next.launch'],
            ]
            return backend_cmds

        # Environment
        env = FrameStack(
            DrSimDecisionK8S(backend_cmds=gen_default_backend_cmds()), n_stack)
        # Agent
        replay_buffer = BigPlayback(
            bucket_cls=MapPlayback,
            cache_path=replay_cache_dir,
            capacity=replay_capacity,
            bucket_size=replay_bucket_size,
            ratio_active=replay_ratio_active,
            max_sample_epoch=replay_max_sample_epoch,
        )
        state_shape = env.observation_space.shape
        __agent = DQN(
            f_create_q=f_net,
            state_shape=state_shape,
            # OneStepTD arguments
            num_actions=num_actions,
            discount_factor=gamma,
            ddqn=if_ddqn,
            # target network sync arguments
            target_sync_interval=target_sync_interval,
            target_sync_rate=target_sync_rate,
            # epsilon greedy arguments
            greedy_epsilon=greedy_epsilon,
            # optimizer arguments
            network_optimizer=LocalOptimizer(optimizer_td, max_grad_norm),
            # sampler arguments
            sampler=TransitionSampler(replay_buffer,
                                      batch_size=batch_size,
                                      interval=update_interval,
                                      minimum_count=sample_mimimum_count),
            # checkpoint
            global_step=global_step)
        # Utilities
        stepsSaver = StepsSaver(our_log_dir)
        reward_vector2scalar = FuncReward(gamma)
        # Configure sess
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_fraction
        with __agent.create_session(
                config=config, save_dir=tf_log_dir,
                save_checkpoint_secs=save_checkpoint_secs) as sess, \
            AsynchronousAgent(
                agent=__agent, method='ratio', ratio=update_ratio) as _agent:
            agent = SkippingAgent(
                # n_skip_vec=(2, 6, 6),
                agent=_agent,
                n_skip=n_skip,
                specific_act=noop)
            summary_writer = SummaryWriterCache.get(tf_log_dir)
            # set vars
            sess.run(op_set_lr, feed_dict={lr_in: learning_rate})
            print "Using learning rate {}".format(sess.run(lr))
            n_ep = 0
            n_total_steps = start_step
            # GoGoGo
            while n_total_steps <= 2.5e5:
                cum_reward = 0.0
                n_ep_steps = 0
                state = env.reset()
                while True:
                    action = agent.act(state, exploration=False)
                    if action != 3:
                        print_qvals(n_ep_steps, __agent, state, action,
                                    AGENT_ACTIONS)
                    next_state, vec_reward, done, env_info = env.step(action)
                    reward, done, reward_info = reward_vector2scalar(
                        action, vec_reward, done, agent.n_skip, agent.cnt_skip)
                    agent_info = agent.step(sess=sess,
                                            state=state,
                                            action=action,
                                            reward=reward,
                                            next_state=next_state,
                                            episode_done=done,
                                            learning_off=True)
                    env_info.update(reward_info)
                    summary_proto = log_info(
                        agent_info,
                        env_info,
                        done,
                        cum_reward,
                        n_ep,
                        n_ep_steps,
                        n_total_steps,
                    )
                    summary_writer.add_summary(summary_proto, n_total_steps)
                    n_total_steps += 1
                    n_ep_steps += 1
                    cum_reward += reward
                    flag_success = reward_info['flag_success'] \
                        if 'flag_success' in reward_info else False
                    stepsSaver.save(n_ep, n_total_steps, state, action,
                                    vec_reward, reward, done, cum_reward,
                                    flag_success)
                    state = next_state
                    if done:
                        n_ep += 1
                        logging.warning(
                            "Episode {} finished in {} steps, reward is {}.".
                            format(
                                n_ep,
                                n_ep_steps,
                                cum_reward,
                            ))
                        break
                if n_ep >= 100:
                    break

    except Exception as e:
        print e.message
        traceback.print_exc()
    finally:
        logging.warning("=" * 30)
        logging.warning("=" * 30)
        logging.warning("Tidying up...")
        # kill orphaned monitor daemon process
        if env is not None:
            env.env.exit()
        replay_buffer.close()
        if replay_buffer is not None:
            replay_buffer.close()
        if _agent is not None:
            _agent.stop()
        # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
        import time
        logging.warning("waiting for k8s end")
        time.sleep(180)
        logging.warning("=" * 30)
Example #29
0
def run_experiment(
    random,
    model_fn,
    dataset_fn,
    optimizer,
    train_batch_size,
    dev_batch_size,
    max_context_len,
    max_answer_len,
    max_question_len,
    eval_every_steps,
    dropout,
    logdir,
    **unused
):
    basic_metrics = {
        'f1_score': f1_score,
        'precision_score': precision_score,
        'recall_score': recall_score,
    }
    train_data, dev_data, misc = dataset_fn()
    assert len(train_data) > 0 and len(dev_data) > 0
    word2id = misc['word2id']
    embedding_matrix = misc['embedding_matrix']

    all_training_data = np.array(
        sum([dataset for dataset in train_data.values()], [])
    )
    # all_dev_data = np.array( for small dev batches
    #     sum([dataset for dataset in dev_data.values()], [])
    # )

    # Graph inputs
    context_t = tf.placeholder(tf.int32, [None, max_context_len], name='context_t')
    context_t_length = tf.placeholder(tf.int32, [None], name='context_t_length')

    question_t = tf.placeholder(tf.int32, [None, max_question_len], name='question_t')
    question_t_length = tf.placeholder(tf.int32, [None], name='question_t_length')

    span2position = data_ops.make_span2position(
        seq_size=max_context_len,
        max_len=max_answer_len
    )

    span_mask_t = tf.placeholder(tf.int32, [None, len(span2position)], name='span_mask_t')

    label_t = tf.placeholder(tf.float32, [None, len(span2position)], name='label_t')

    is_training = tf.placeholder(tf.bool, name='is_training_flag')

    position2span = {v: k for k, v in span2position.items()}
    id2word = {v: k for k, v in word2id.items()}

    # Model outputs
    logits_t, *the_rest = model_fn(
        context_t,
        context_t_length,
        question_t,
        question_t_length,
        span2position,
        embedding_matrix,
        span_mask_t,
        is_training
    )

    # Build a mask which masks out-of-bound spans
    span_mask = tf.cast(span_mask_t, tf.float32)

    # Mask the logits of spans which shouldn't be considered
    logits_t *= span_mask

    logit_min = tf.reduce_min(logits_t, axis=1, keepdims=True)
    logits_t -= logit_min
    logits_t *= span_mask

    # Find the indexes of the predicted spans
    y_preds = tf.argmax(logits_t, axis=1)

    # For numerical stability reasons subtract the max
    logit_max = tf.reduce_max(logits_t, axis=1, keepdims=True)
    logits_t -= logit_max
    logits_t *= span_mask

    # Negative log likelihood (i.e. multiclass cross-entropy) loss
    exp_logits_t = tf.exp(logits_t) * span_mask
    log_sum_exp_logits_t = tf.log(tf.reduce_sum(exp_logits_t, axis=1) + 1e-7)

    gather_mask = tf.one_hot(y_preds, depth=logits_t.get_shape()[-1], dtype=tf.bool, on_value=True, off_value=False)
    y_logits = tf.boolean_mask(logits_t, gather_mask)

    xents = log_sum_exp_logits_t - y_logits

    loss_t = tf.reduce_mean(xents)

    prediction_probs_t = exp_logits_t / tf.expand_dims(tf.reduce_sum(exp_logits_t, axis=1), 1)

    # Optimizer
    global_step_t = tf.train.create_global_step()
    train_op = optimizer.minimize(loss_t, global_step=global_step_t)

    # Session
    sess = tf.train.MonitoredTrainingSession(
        checkpoint_dir=logdir,
        save_checkpoint_secs=60000,
        save_summaries_steps=50
    )

    # Summaries
    summary_writer = SummaryWriterCache.get(logdir)
    metrics_logger = experiment_logging.TensorboardLogger(writer=summary_writer)
    shutil.copyfile(config_path, logdir + '/config.py')  # save config in logdir

    # Fetch entire dev set (no need to do this inside the eval loop repeatedly)
    dev_feed_dicts = {  # One feed dict for each dataset
        dataset_name: {
            context_t: np.asarray([x['context'] for x in dataset]),
            context_t_length: np.asarray([x['context_len'] for x in dataset]),
            question_t: np.asarray([x['question'] for x in dataset]),
            question_t_length: np.asarray([x['question_len'] for x in dataset]),
            label_t: np.asarray([x['label'] for x in dataset]),
            span_mask_t: np.asarray([x['span_mask'] for x in dataset]),
        } for dataset_name, dataset in dev_data.items()
    }

    # Train-Eval loop
    epoch_indices = np.random.permutation(np.arange(len(all_training_data)))
    while True:
        train_indices = epoch_indices[:train_batch_size]
        if len(epoch_indices) < train_batch_size:
            epoch_indices = np.random.permutation(np.arange(len(all_training_data)))

        train_batch = all_training_data[train_indices]
        train_feed_dict = {
            context_t: np.asarray([x['context'] for x in train_batch]),
            context_t_length: np.asarray([x['context_len'] for x in train_batch]),
            question_t: np.asarray([x['question'] for x in train_batch]),
            question_t_length: np.asarray([x['question_len'] for x in train_batch]),
            label_t: np.asarray([x['label'] for x in train_batch]),
            span_mask_t: np.asarray([x['span_mask'] for x in train_batch]),
            is_training: False,
            # 'out_dropout:0': dropout,
        }
        current_step, train_loss, _xents, _logits_t, _exp_logits_t, _log_sum_exp_logits_t,  *_the_rest = sess.run(
            [global_step_t, loss_t, xents, logits_t, exp_logits_t, log_sum_exp_logits_t] + the_rest + [train_op],
            feed_dict=train_feed_dict
        )

        if eval_every_steps is not None and current_step % eval_every_steps == 0:
            beginning_of_eval_time = time.time()
            logging.info('<large eval>:dev')

            # batch eval each dataset
            outputs_for_each_dataset = {}
            for dataset_name, dataset_feed_dict in dev_feed_dicts.items():
                logging.info(f'Computing dev outputs for {dataset_name}')
                batched_feed_dicts = [
                    {
                        placeholder: eval_data[i: i+dev_batch_size]
                        for placeholder, eval_data in dataset_feed_dict.items()
                    }
                    for i in range(0, len(dev_data[dataset_name]), dev_batch_size)
                ]

                for d in batched_feed_dicts:
                    d.update({is_training: False})

                dataset_model_output = None
                batched_model_outputs = [
                    sess.run(
                        {
                            'prediction_probs_t': prediction_probs_t,
                            'label_t': label_t,
                            'loss_per_example_t': xents
                        },
                        feed_dict=batch_feed_dict
                    ) for batch_feed_dict in tqdm(batched_feed_dicts)
                ]

                dataset_model_output = {
                    tensor_name: np.concatenate([output[tensor_name] for output in batched_model_outputs])
                    for tensor_name in batched_model_outputs[0].keys()
                }

                outputs_for_each_dataset[dataset_name] = dataset_model_output

            # much nicer, non batched version of evaluating
            # outputs_for_each_dataset = {
            #     dataset_name: sess.run(
            #         {
            #             'prediction_probs_t': prediction_probs_t,
            #             'label_t': label_t,
            #             'loss_per_example_t': loss_per_example_t
            #         },
            #         feed_dict=dataset_feed_dict
            #     ) for dataset_name, dataset_feed_dict in dev_feed_dicts.items()
            # }

            # build a combined dataset
            output_names = outputs_for_each_dataset[list(outputs_for_each_dataset.keys())[0]].keys()  # HACK

            all_dev_outputs = {
                output_name: np.concatenate([
                    outputs_for_each_dataset[dataset_name][output_name] for dataset_name in outputs_for_each_dataset
                ]) for output_name in output_names
            }
            outputs_for_each_dataset['combined'] = all_dev_outputs

            for dataset_name, dev_model_outputs in outputs_for_each_dataset.items():
                metrics_logger.log_scalar(
                    f'loss/{dataset_name}',
                    dev_model_outputs['loss_per_example_t'].mean(),
                    current_step
                )

                dev_probs = dev_model_outputs['prediction_probs_t']
                dev_labels = dev_model_outputs['label_t']

                # predicted_labels = (dev_probs > 0.5).astype(int)
                predicted_labels = (dev_probs.max(axis=1, keepdims=1) == dev_probs).astype(int)

                for metric_name, metric_fn in basic_metrics.items():
                    score = metric_fn(
                        y_true=np.ndarray.flatten(dev_labels),
                        y_pred=np.ndarray.flatten(predicted_labels),
                        average=None
                    )

                    for i, val in enumerate(score):
                        metrics_logger.log_scalar(
                            f'{metric_name}/{dataset_name}/label_{i}',
                            val,
                            current_step
                        )

                acc = accuracy_score(
                    y_true=np.ndarray.flatten(dev_labels),
                    y_pred=np.ndarray.flatten(predicted_labels),
                )

                metrics_logger.log_scalar(
                    f'accuracy/{dataset_name}',
                    acc,
                    current_step
                )

                if dataset_name == 'combined':  # only want per-dataset examples
                    continue

                context_dev = [x['context_raw'] for x in dev_data[dataset_name]]
                question_dev = [x['question_raw'] for x in dev_data[dataset_name]]

                np.all((dev_labels == predicted_labels), axis=1)

                to_pick_correct = experiment_logging.select_n_classified(
                    ground_truth=dev_labels,
                    predicted=predicted_labels,
                    correct=True,
                    n=2
                )

                to_pick_wrong = experiment_logging.select_n_classified(
                    ground_truth=dev_labels,
                    predicted=predicted_labels,
                    correct=False,
                    n=2
                )

                prob_dist = np.argmax(dev_probs, axis=1)

                span_counts = Counter(prob_dist)
                sorted_span_counts = sorted(span_counts.items())
                span_pos_counts = dict([(position2span[x[0]], x[1]) for x in sorted_span_counts])

                print('DEV predicted span distribution')
                print(span_pos_counts)

                # TODO: repeated code, move to methods? + the following code cannot handle cases where some spans are
                # correct and others aren't (it will just show them as being all wrong).

                if to_pick_correct:
                    correct_spans = [
                        [position2span[i] for i, x in enumerate(predicted_labels[p]) if x == 1]
                        for p in to_pick_correct
                    ]
                    correct_contexts = [context_dev[p] for p in to_pick_correct]
                    correct_questions = [question_dev[p] for p in to_pick_correct]

                    for s, c, q in zip(correct_spans, correct_contexts, correct_questions):
                        prompt = ' '.join(q)
                        experiment_logging.print_spans(c, s, prompt)

                if to_pick_wrong:
                    wrong_spans = [
                        [position2span[i] for i, x in enumerate(predicted_labels[p]) if x == 1]
                        for p in to_pick_wrong
                    ]
                    wrong_contexts = [context_dev[p] for p in to_pick_wrong]
                    wrong_questions = [question_dev[p] for p in to_pick_wrong]

                    for s, c, q in zip(wrong_spans, wrong_contexts, wrong_questions):
                        prompt = ' '.join(q)
                        experiment_logging.print_spans(
                            tokens=c,
                            spans=s,
                            prompt=prompt,
                            span_color='\x1b[6;30;41m',
                            prompt_color='\33[1m\33[31m'
                        )

            logging.info(f'evaluation took {time.time() - beginning_of_eval_time:.2f} seconds')
Example #30
0
    def work(self):
        hooks = [self.ppo.sync_replicas_hook]
        sess = tf.train.MonitoredTrainingSession(master=self.server.target,
                                                 is_chief=(self.wid == 0),
                                                 checkpoint_dir=SUMMARY_DIR,
                                                 save_summaries_steps=None,
                                                 save_summaries_secs=None,
                                                 hooks=hooks)
        if self.wid == 0:
            writer = SummaryWriterCache.get(SUMMARY_DIR)

        t, episode, terminal = 0, 0, False
        buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], []
        rolling_r = RunningStats()

        while not sess.should_stop() and not (episode > EP_MAX
                                              and self.wid == 0):

            s = self.env.reset()
            ep_r, ep_t, ep_a = 0, 0, []

            while True:
                a, v = self.ppo.evaluate_state(s, sess)

                # Update ppo
                if t == BATCH:  # or (terminal and t < BATCH):
                    # Normalise rewards
                    rewards = np.array(buffer_r)
                    rolling_r.update(rewards)
                    rewards = np.clip(rewards / rolling_r.std, -10, 10)

                    v_final = [
                        v * (1 - terminal)
                    ]  # v = 0 if terminal, otherwise use the predicted v
                    values = np.array(buffer_v + v_final)
                    terminals = np.array(buffer_terminal + [terminal])

                    # Generalized Advantage Estimation - https://arxiv.org/abs/1506.02438
                    delta = rewards + GAMMA * values[1:] * (
                        1 - terminals[1:]) - values[:-1]
                    advantage = discount(delta, GAMMA * LAMBDA, terminals)
                    returns = advantage + np.array(buffer_v)
                    advantage = (advantage - advantage.mean()) / np.maximum(
                        advantage.std(), 1e-6)

                    bs, ba, br, badv = np.reshape(buffer_s, (t,) + self.ppo.s_dim), np.vstack(buffer_a), \
                                       np.vstack(returns), np.vstack(advantage)

                    graph_summary = self.ppo.update(bs, ba, br, badv, sess)
                    buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], []
                    t = 0

                buffer_s.append(s)
                buffer_a.append(a)
                buffer_v.append(v)
                buffer_terminal.append(terminal)
                ep_a.append(a)

                if not self.ppo.discrete:
                    a = np.clip(a, self.env.action_space.low,
                                self.env.action_space.high)
                s, r, terminal, _ = self.env.step(a)
                buffer_r.append(r)

                ep_r += r
                ep_t += 1
                t += 1

                if terminal:
                    # End of episode summary
                    print('Worker_%i' % self.wid, '| Episode: %i' % episode,
                          "| Reward: %.2f" % ep_r, '| Steps: %i' % ep_t)

                    if self.wid == 0:
                        worker_summary = tf.Summary()
                        worker_summary.value.add(tag="Reward",
                                                 simple_value=ep_r)

                        # Create Action histograms for each dimension
                        actions = np.array(ep_a)
                        if self.ppo.discrete:
                            add_histogram(writer,
                                          "Action",
                                          actions,
                                          episode,
                                          bins=self.ppo.a_dim)
                        else:
                            for a in range(self.ppo.a_dim):
                                add_histogram(writer, "Action/Dim" + str(a),
                                              actions[:, a], episode)

                        try:
                            writer.add_summary(graph_summary, episode)
                        except NameError:
                            pass
                        writer.add_summary(worker_summary, episode)
                        writer.flush()

                    episode += 1
                    break

        self.env.close()
        print("Worker_%i finished" % self.wid)
Example #31
0
        # seperate subgraph of model training in tensorboard for better visualization
        # defining apply_gradients as a global step between workers
        with tf.name_scope('Model_Training'):
            global_step = tf.train.get_or_create_global_step()
            optimizer = tf.train.GradientDescentOptimizer(0.01)
            grads_and_vars = optimizer.compute_gradients(loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                    global_step=global_step)

        # find the accuracy (used to plot test_accuracy) and summary writer initialization for tensorboard
        correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar('Accuracy', accuracy)
	
        summary_var = tf.summary.merge_all()
        writer = SummaryWriterCache.get(log_dir)


        # create a monitored training session obect
        config = tf.ConfigProto(device_filters=['/job:ps',
                                '/job:worker/task:%d'
                                % FLAGS.task_index])
        mts = tf.train.MonitoredTrainingSession(master=server.target,
                is_chief=FLAGS.task_index == 0, config=config)

        iterations = 0
        with mts as sess:
	    # run till test loss is more than the convergin_loss( loss that is desired)
            while True:
                (data_batch, label_batch) = \
                    mnist.train.next_batch(FLAGS.batch_size)