def __init__(self, save_steps=100, output_dir=None, summary_writer=None, scaffold=None, summary_op=None): """Initializes a `SummarySaver` monitor. Args: save_steps: `int`, save summaries every N steps. See `EveryN`. output_dir: `string`, the directory to save the summaries to. Only used if no `summary_writer` is supplied. summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed, one will be created accordingly. scaffold: `Scaffold` to get summary_op if it's not provided. summary_op: `Tensor` of type `string`. A serialized `Summary` protocol buffer, as output by TF summary methods like `scalar_summary` or `merge_all_summaries`. """ # TODO(ipolosukhin): Implement every N seconds. self._summary_op = summary_op self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir) self._scaffold = scaffold self._save_steps = save_steps
def __init__(self, checkpoint_dir, save_secs=None, save_steps=None, saver=None, checkpoint_basename="model.ckpt", scaffold=None): """Initialize CheckpointSaverHook monitor. Args: checkpoint_dir: `str`, base directory for the checkpoint files. save_secs: `int`, save every N secs. save_steps: `int`, save every N steps. saver: `Saver` object, used for saving. checkpoint_basename: `str`, base name for the checkpoint files. scaffold: `Scaffold`, use to get saver object. Raises: ValueError: One of `save_steps` or `save_secs` should be set. """ logging.info("Create CheckpointSaverHook.") self._saver = saver self._checkpoint_dir = checkpoint_dir self._summary_writer = SummaryWriterCache.get(checkpoint_dir) self._save_path = os.path.join(checkpoint_dir, checkpoint_basename) self._scaffold = scaffold self._save_secs = save_secs self._save_steps = save_steps self._last_saved_time = None self._last_saved_step = None if save_steps is None and save_secs is None: raise ValueError("Either save_steps or save_secs should be provided") if (save_steps is not None) and (save_secs is not None): raise ValueError("Can not provide both save_steps and save_secs.")
def begin(self): # These calls only works because the SessionRunHook api guarantees this # will get called within a graph context containing our model graph. self.summary_writer = SummaryWriterCache.get(self.working_dir) self.weight_tensors = tf.trainable_variables() self.global_step = tf.train.get_or_create_global_step()
def __init__(self, save_steps=100, save_secs=None, output_dir=None, summary_writer=None, scaffold=None, summary_op=None): """Initializes a `SummarySaver` monitor. Args: save_steps: `int`, save summaries every N steps. Exactly one of `save_secs` and `save_steps` should be set. save_secs: `int`, save summaries every N seconds. output_dir: `string`, the directory to save the summaries to. Only used if no `summary_writer` is supplied. summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed, one will be created accordingly. scaffold: `Scaffold` to get summary_op if it's not provided. summary_op: `Tensor` of type `string`. A serialized `Summary` protocol buffer, as output by TF summary methods like `scalar_summary` or `merge_all_summaries`. """ self._summary_op = summary_op self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir) self._scaffold = scaffold self._timer = _SecondOrStepTimer(every_secs=save_secs, every_steps=save_steps)
def __init__(self, save_steps=None, save_secs=None, output_dir="", show_dataflow=True, show_memory=False): """Initializes a hook that takes periodic profiling snapshots. `options.run_metadata` argument of `tf.Session.Run` is used to collect metadata about execution. This hook sets the metadata and dumps it in Chrome Trace format. Args: save_steps: `int`, save profile traces every N steps. Exactly one of `save_secs` and `save_steps` should be set. save_secs: `int` or `float`, save profile traces every N seconds. output_dir: `string`, the directory to save the profile traces to. Defaults to the current directory. show_dataflow: `bool`, if True, add flow events to the trace connecting producers and consumers of tensors. show_memory: `bool`, if True, add object snapshot events to the trace showing the sizes and lifetimes of tensors. """ self._output_file = os.path.join(output_dir, "timeline-{}.json") self._file_writer = SummaryWriterCache.get(output_dir) self._show_dataflow = show_dataflow self._show_memory = show_memory self._timer = SecondOrStepTimer( every_secs=save_secs, every_steps=save_steps)
def __init__(self, checkpoint_dir, save_secs=None, save_steps=None, saver=None, checkpoint_basename="model.ckpt", scaffold=None): """Initialize CheckpointSaverHook monitor. Args: checkpoint_dir: `str`, base directory for the checkpoint files. save_secs: `int`, save every N secs. save_steps: `int`, save every N steps. saver: `Saver` object, used for saving. checkpoint_basename: `str`, base name for the checkpoint files. scaffold: `Scaffold`, use to get saver object. Raises: ValueError: One of `save_steps` or `save_secs` should be set. ValueError: Exactly one of saver or scaffold should be set. """ logging.info("Create CheckpointSaverHook.") if ((saver is None and scaffold is None) or (saver is not None and scaffold is not None)): raise ValueError("Exactly one of saver or scaffold must be provided.") self._saver = saver self._checkpoint_dir = checkpoint_dir self._summary_writer = SummaryWriterCache.get(checkpoint_dir) self._save_path = os.path.join(checkpoint_dir, checkpoint_basename) self._scaffold = scaffold self._timer = _SecondOrStepTimer(every_secs=save_secs, every_steps=save_steps)
def after_run(self, run_context, run_values): del run_context # Unused by feature importance summary saver hook. # Read result tensors. global_step = run_values.results["global_step"] feature_names = run_values.results["feature_names"] feature_usage_counts = run_values.results["feature_usage_counts"] feature_gains = run_values.results["feature_gains"] # Ensure summaries are logged at desired frequency if (self._last_triggered_step is not None and global_step < self._last_triggered_step + self._every_n_steps): return # Validate tensors. if (len(feature_names) != len(feature_usage_counts) or len(feature_names) != len(feature_gains)): raise RuntimeError( "Feature names and importance measures have inconsistent lengths.") # Compute total usage. total_usage_count = 0.0 for usage_count in feature_usage_counts: total_usage_count += usage_count usage_count_norm = 1.0 / total_usage_count if total_usage_count else 1.0 # Compute total gain. total_gain = 0.0 for gain in feature_gains: total_gain += gain gain_norm = 1.0 / total_gain if total_gain else 1.0 # Output summary for each feature. self._last_triggered_step = global_step for (name, usage_count, gain) in zip(feature_names, feature_usage_counts, feature_gains): output_dir = os.path.join(self._model_dir, name.decode("utf-8")) summary_writer = SummaryWriterCache.get(output_dir) usage_count_summary = Summary(value=[ Summary.Value( tag="feature_importance/usage_counts", simple_value=usage_count) ]) usage_fraction_summary = Summary(value=[ Summary.Value( tag="feature_importance/usage_fraction", simple_value=usage_count * usage_count_norm) ]) summary_writer.add_summary(usage_count_summary, global_step) summary_writer.add_summary(usage_fraction_summary, global_step) gains_summary = Summary( value=[Summary.Value( tag="feature_importance/gains", simple_value=gain)]) gains_fraction_summary = Summary( value=[Summary.Value( tag="feature_importance/gains_fraction", simple_value=gain * gain_norm)]) summary_writer.add_summary(gains_summary, global_step) summary_writer.add_summary(gains_fraction_summary, global_step)
def begin(self): if self._output_dir: self._summary_writer = SummaryWriterCache.get(self._output_dir) self._next_step = None self._global_step = tf.train.get_global_step() if self._global_step is None: raise RuntimeError('Global step must be created for VarVisHook.')
def begin(self): if self._summary_writer is None and self._output_dir: self._summary_writer = SummaryWriterCache.get(self._output_dir) self._next_step = None self._global_step_tensor = training_util.get_global_step() if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use SummarySaverHook.")
def begin(self): if self._summary_writer is None and self._output_dir: self._summary_writer = SummaryWriterCache.get(self._output_dir) self._next_episode = None self._current_episode = None self._global_episode_tensor = get_global_episode() if self._global_episode_tensor is None: raise RuntimeError("Global episode should be created to use EpisodeSummarySaverHook.")
def begin(self): if self._summary_writer is None and self._output_dir: self._summary_writer = SummaryWriterCache.get(self._output_dir) self._global_step_tensor = training_util.get_global_step() if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use StepCounterHook.") self._summary_tag = self._global_step_tensor.op.name + "/sec"
def begin(self): if self._summary_writer is None and self._output_dir: self._summary_writer = SummaryWriterCache.get(self._output_dir) self._next_step = None self._global_step_tensor = training_util._get_or_create_global_step_read() # pylint: disable=protected-access if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use SummarySaverHook.")
def begin(self): self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir) self._global_step_tensor = training_util._get_or_create_global_step_read() # pylint: disable=protected-access if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use CheckpointSaverHook.") for l in self._listeners: l.begin()
def begin(self): self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir) self._global_step_tensor = training_util.get_global_step() if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use CheckpointSaverHook.") for l in self._listeners: l.begin()
def begin(self): if self._summary_writer is None and self._output_dir: self._summary_writer = SummaryWriterCache.get(self._output_dir) self._global_episode_tensor = get_global_episode() if self._global_episode_tensor is None: raise RuntimeError("Global step should be created to use EpisodeCounterHook.") self._summary_sec_tag = self._global_episode_tensor.op.name + "/sec" self._summary_steps_tag = self._global_episode_tensor.op.name + "/steps" self._num_steps = 0
def __init__(self, every_n_steps=100, every_n_secs=None, output_dir=None, summary_writer=None): if (every_n_steps is None) == (every_n_secs is None): raise ValueError( "exactly one of every_n_steps and every_n_secs should be provided.") self._timer = _SecondOrStepTimer(every_steps=every_n_steps, every_secs=every_n_secs) self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir)
def __init__(self, checkpoint_dir, save_secs=None, save_steps=None, saver=None, checkpoint_basename="model.ckpt", scaffold=None, listeners=None): """Initialize CheckpointSaverHook monitor. Args: checkpoint_dir: `str`, base directory for the checkpoint files. save_secs: `int`, save every N secs. save_steps: `int`, save every N steps. saver: `Saver` object, used for saving. checkpoint_basename: `str`, base name for the checkpoint files. scaffold: `Scaffold`, use to get saver object. listeners: List of `CheckpointSaverListener` subclass instances. Used for callbacks that run immediately after the corresponding CheckpointSaverHook callbacks, only in steps where the CheckpointSaverHook was triggered. Raises: ValueError: One of `save_steps` or `save_secs` should be set. ValueError: Exactly one of saver or scaffold should be set. """ logging.info("Create CheckpointSaverHook.") if ((saver is None and scaffold is None) or (saver is not None and scaffold is not None)): raise ValueError("Exactly one of saver or scaffold must be provided.") self._saver = saver self._checkpoint_dir = checkpoint_dir self._summary_writer = SummaryWriterCache.get(checkpoint_dir) self._save_path = os.path.join(checkpoint_dir, checkpoint_basename) self._scaffold = scaffold self._timer = _SecondOrStepTimer(every_secs=save_secs, every_steps=save_steps) self._listeners = listeners or []
def __init__(self, save_steps=None, save_secs=None, output_dir=None, summary_writer=None, scaffold=None, summary_op=None): """Initializes a `SummarySaver` monitor. Args: save_steps: `int`, save summaries every N steps. Exactly one of `save_secs` and `save_steps` should be set. save_secs: `int`, save summaries every N seconds. output_dir: `string`, the directory to save the summaries to. Only used if no `summary_writer` is supplied. summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed, one will be created accordingly. scaffold: `Scaffold` to get summary_op if it's not provided. summary_op: `Tensor` of type `string` containing the serialized `Summary` protocol buffer or a list of `Tensor`. They are most likely an output by TF summary methods like `tf.summary.scalar` or `tf.summary.merge_all`. It can be passed in as one tensor; if more than one, they must be passed in as a list. Raises: ValueError: Exactly one of scaffold or summary_op should be set. """ if ((scaffold is None and summary_op is None) or (scaffold is not None and summary_op is not None)): raise ValueError( "Exactly one of scaffold or summary_op must be provided.") self._summary_op = summary_op self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir) self._scaffold = scaffold self._timer = _SecondOrStepTimer(every_secs=save_secs, every_steps=save_steps)
stepsSaver = StepsSaver(FLAGS.our_log_dir) reward_vector2scalar = FuncReward(gamma) # Configure sess config = tf.ConfigProto() config.gpu_options.allow_growth = True with __agent.create_session( config=config, save_dir=FLAGS.tf_log_dir, save_checkpoint_secs=FLAGS.save_checkpoint_secs) as sess, \ AsynchronousAgent( agent=__agent, method='rate', rate=update_rate) as _agent: agent = SkippingAgent( # n_skip_vec=(2, 6, 6), agent=_agent, n_skip=n_skip, specific_act=noop) summary_writer = SummaryWriterCache.get(FLAGS.tf_log_dir) # set vars sess.run(op_set_lr, feed_dict={lr_in: learning_rate}) print "Using learning rate {}".format(sess.run(lr)) n_ep = 0 n_total_steps = 0 # GoGoGo for _ in range(1000): cum_reward = 0.0 n_ep_steps = 0 state = env.reset() while True: action = agent.act(state) if action != 3: print_qvals(n_ep_steps, __agent, state, action, AGENT_ACTIONS)
stepsSaver = StepsSaver(our_log_dir) reward_vector2scalar = FuncReward(gamma) # Configure sess config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_fraction with __agent.create_session( config=config, save_dir=tf_log_dir, save_checkpoint_secs=save_checkpoint_secs) as sess, \ AsynchronousAgent( agent=__agent, method='ratio', ratio=update_ratio) as _agent: agent = SkippingAgent( # n_skip_vec=(2, 6, 6), agent=_agent, n_skip=n_skip, specific_act=noop) summary_writer = SummaryWriterCache.get(tf_log_dir) # set vars sess.run(op_set_lr, feed_dict={lr_in: learning_rate}) print "Using learning rate {}".format(sess.run(lr)) n_ep = 0 n_total_steps = start_step # GoGoGo while n_total_steps <= 2.5e5: cum_reward = 0.0 n_ep_steps = 0 state = env.reset() while True: action = agent.act(state) if action != 3: print_qvals(n_ep_steps, __agent, state, action, AGENT_ACTIONS)
else: for var in tf.global_variables(): print "var_name: ", var.name if 'Adam' in var.name or 'optimizers/beta1_power' in var.name \ or 'optimizers/beta2_power' in var.name\ or 'q_logits' in var.name\ or var.name == 'global_step:0': pass else: restore_var_list.append(var) try: with agent.create_session(config=config, save_dir=FLAGS.logdir, save_checkpoint_secs=3600, restore_var_list=restore_var_list) as sess: summary_writer = SummaryWriterCache.get(FLAGS.logdir) all_vars = tf.global_variables() with open(FLAGS.logdir+"/readme.txt", "w") as f: f.write("readme: {}\n".format(FLAGS.readme)) f.write("logdir: {}\n".format(FLAGS.logdir)) f.write("savedir: {}\n".format(FLAGS.savedir)) f.write("restore var names: \n") for var_name in restore_var_list: f.write("{}\n".format(var_name)) f.write("gpu_fraction: {}\n".format(FLAGS.gpu_fraction)) f.write("discount_factor: {}\n".format(FLAGS.discount_factor)) f.write("batch_size: {}\n".format(FLAGS.batch_size)) f.write("ac learning rate: {}\n".format(FLAGS.lr)) f.write("is_learn_q: {} \n".format(FLAGS.is_learn_q)) f.write("is_fine_tune: {} \n".format(FLAGS.is_fine_tune))
def main_train(args, files, tf_config): assert args.logdir != '', 'logdir cannot be empty' logdir = os.path.join(args.logdir, 'tf_output') if os.path.isdir(logdir): do_not_delete = True if args.ngpus > 1: if hvd.rank() == 0: if args.force_continue: do_not_delete = True else: do_not_delete = False else: do_not_delete = True elif HEADLESS: if args.force_continue: do_not_delete = True else: raise ValueError('{} exists'.format(logdir)) else: while True: try: key = input( '{} \n do you want to continue?'.format(logdir)) except NameError: key = 'y' if key == 'y': break elif key == 'n': do_not_delete = False break else: print('invalid key') if not do_not_delete: print('******* Deleting {} *******'.format(logdir)) os.system('rm -r {}'.format(logdir)) else: print('continuing') elif args.ngpus == 1 or hvd.rank() == 0: os.makedirs(logdir) print('logdir is {}'.format(logdir)) tf_output, pc_reader = build_tf_ops( args=args, data_dict=None, files=files, ) train_op, summary_op, tf_data_dict, logger_dict, tf_step = tf_output summary_hook = tf.train.SummarySaverHook( summary_op=summary_op, output_dir=logdir, save_steps=args.save_steps, ) logging_hook = tf.train.LoggingTensorHook( tensors=logger_dict, every_n_iter=args.log_steps, ) hooks = [] if args.ngpus > 1: hooks.append(hvd.BroadcastGlobalVariablesHook(0)) if hvd.rank() == 0: checkpoint_dir = logdir save_checkpoint_secs = 300 hooks += [logging_hook, summary_hook] else: checkpoint_dir = None save_checkpoint_secs = 0 hooks += [logging_hook] else: hooks = [logging_hook, summary_hook] checkpoint_dir = logdir save_checkpoint_secs = 300 if args.init_checkpoint_folder != '': checkpoint = tf.train.latest_checkpoint(args.init_checkpoint_folder) tf.train.init_from_checkpoint(checkpoint, {'/': '/'}) yaml_path = os.path.join(args.logdir, 'args.yaml') with open(yaml_path, 'w') as yaml_file: yaml.dump(args, yaml_file) with tf.train.MonitoredTrainingSession( checkpoint_dir=checkpoint_dir, hooks=hooks, save_summaries_secs=0, save_checkpoint_secs=save_checkpoint_secs, config=tf_config, ) as mon_sess: start_time = time.time() print(time.time() - start_time) writer = SummaryWriterCache.get(logdir) while not mon_sess.should_stop(): # print('hvd rank = {}, current_index = {}, nfiles = {}'.format(current_index, hvd.rank(), len(my_files))) tensor_list = [tf_step, tf_data_dict] if args.training_splits == 'train': tensor_list += [train_op] + tensor_list # _, step, data_dict = mon_sess.run(tensor_list) mon_sess.run(tensor_list)
def train(fps, args): with tf.name_scope('loader'): x, cond_text, _ = loader.get_batch(fps, args.train_batch_size, _WINDOW_LEN, args.data_first_window, conditionals=True, name='batch') wrong_audio = loader.get_batch(fps, args.train_batch_size, _WINDOW_LEN, args.data_first_window, conditionals=False, name='wrong_batch') # wrong_cond_text, wrong_cond_text_embed = loader.get_batch(fps, args.train_batch_size, _WINDOW_LEN, args.data_first_window, wavs=False, conditionals=True, name='batch') # Make z vector z = tf.random_normal([args.train_batch_size, _D_Z]) embed = hub.Module('https://tfhub.dev/google/elmo/2', trainable=False, name='embed') cond_text_embed = embed(cond_text) # Add conditioning input to the model args.wavegan_g_kwargs['context_embedding'] = cond_text_embed args.wavegan_d_kwargs['context_embedding'] = args.wavegan_g_kwargs[ 'context_embedding'] lod = tf.placeholder(tf.float32, shape=[]) with tf.variable_scope('G'): # Make generator G_z, c_kl_loss = WaveGANGenerator(z, lod, train=True, **args.wavegan_g_kwargs) if args.wavegan_genr_pp: with tf.variable_scope('pp_filt'): G_z = tf.layers.conv1d(G_z, 1, args.wavegan_genr_pp_len, use_bias=False, padding='same') # Summarize G_z_rms = tf.sqrt(tf.reduce_mean(tf.square(G_z[:, :, 0]), axis=1)) x_rms = tf.sqrt(tf.reduce_mean(tf.square(x[:, :, 0]), axis=1)) x_rms_lod_4 = tf.sqrt( tf.reduce_mean(tf.square(avg_downsample(x)[:, :, 0]), axis=1)) x_rms_lod_3 = tf.sqrt( tf.reduce_mean(tf.square(avg_downsample(avg_downsample(x))[:, :, 0]), axis=1)) x_rms_lod_2 = tf.sqrt( tf.reduce_mean(tf.square( avg_downsample(avg_downsample(avg_downsample(x)))[:, :, 0]), axis=1)) x_rms_lod_1 = tf.sqrt( tf.reduce_mean(tf.square( avg_downsample(avg_downsample(avg_downsample( avg_downsample(x))))[:, :, 0]), axis=1)) x_rms_lod_0 = tf.sqrt( tf.reduce_mean(tf.square( avg_downsample( avg_downsample( avg_downsample(avg_downsample(avg_downsample(x)))))[:, :, 0]), axis=1)) tf.summary.histogram('x_rms_batch', x_rms) tf.summary.histogram('G_z_rms_batch', G_z_rms) tf.summary.scalar('x_rms', tf.reduce_mean(x_rms)) tf.summary.scalar('x_rms_lod_4', tf.reduce_mean(x_rms_lod_4)) tf.summary.scalar('x_rms_lod_3', tf.reduce_mean(x_rms_lod_3)) tf.summary.scalar('x_rms_lod_2', tf.reduce_mean(x_rms_lod_2)) tf.summary.scalar('x_rms_lod_1', tf.reduce_mean(x_rms_lod_1)) tf.summary.scalar('x_rms_lod_0', tf.reduce_mean(x_rms_lod_0)) tf.summary.scalar('G_z_rms', tf.reduce_mean(G_z_rms)) tf.summary.audio('x', x, _FS, max_outputs=10) tf.summary.audio('G_z', G_z, _FS, max_outputs=10) tf.summary.text('Conditioning Text', cond_text[:10]) # with tf.variable_scope('G'): # # Make history buffer # history_buffer = HistoryBuffer(_WINDOW_LEN, args.train_batch_size * 100, args.train_batch_size) # # Select half of batch from history buffer # g_from_history, r_from_history, embeds_from_history = history_buffer.get_from_history_buffer() # new_fake_batch = tf.concat([G_z[:tf.shape(G_z)[0] - tf.shape(g_from_history)[0]], g_from_history], 0) # Use tf.shape to handle case when g_from_history is empty # new_cond_embeds = tf.concat([cond_text_embed[:tf.shape(cond_text_embed)[0] - tf.shape(embeds_from_history)[0]], embeds_from_history], 0) # new_real_batch = tf.concat([x[:tf.shape(x)[0] - tf.shape(r_from_history)[0]], r_from_history], 0) # with tf.control_dependencies([new_fake_batch, new_real_batch, new_cond_embeds]): # with tf.control_dependencies([history_buffer.add_to_history_buffer(G_z, x, cond_text_embed)]): # G_z = tf.identity(new_fake_batch) # x = tf.identity(new_real_batch) # args.wavegan_g_kwargs['context_embedding'] = tf.identity(new_cond_embeds) # args.wavegan_d_kwargs['context_embedding'] = args.wavegan_g_kwargs['context_embedding'] # G_z.set_shape([args.train_batch_size, _WINDOW_LEN, 1]) # x.set_shape([args.train_batch_size, _WINDOW_LEN, 1]) G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='G') # Print G summary print('-' * 80) print('Generator vars') nparams = 0 for v in G_vars: v_shape = v.get_shape().as_list() v_n = reduce(lambda x, y: x * y, v_shape) nparams += v_n print('{} ({}): {}'.format(v.get_shape().as_list(), v_n, v.name)) print('Total params: {} ({:.2f} MB)'.format(nparams, (float(nparams) * 4) / (1024 * 1024))) # Summarize # tf.summary.scalar('history_buffer_size', history_buffer.current_size) # tf.summary.scalar('g_from_history_size', tf.shape(g_from_history)[0]) # tf.summary.scalar('r_from_history_size', tf.shape(r_from_history)[0]) # tf.summary.scalar('embeds_from_history_size', tf.shape(embeds_from_history)[0]) # tf.summary.audio('G_z_history', g_from_history, _FS, max_outputs=10) # tf.summary.audio('x_history', r_from_history, _FS, max_outputs=10) tf.summary.audio('wrong_audio', wrong_audio, _FS, max_outputs=10) tf.summary.scalar('Conditional Resample - KL-Loss', c_kl_loss) # tf.summary.scalar('embed_error_cosine', tf.reduce_sum(tf.multiply(cond_text_embed, expected_embed)) / (tf.norm(cond_text_embed) * tf.norm(expected_embed))) # tf.summary.scalar('embed_error_cosine_wrong', tf.reduce_sum(tf.multiply(wrong_cond_text_embed, expected_embed)) / (tf.norm(wrong_cond_text_embed) * tf.norm(expected_embed))) # Make real discriminator with tf.name_scope('D_x'), tf.variable_scope('D'): D_x = WaveGANDiscriminator(x, lod, **args.wavegan_d_kwargs) D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='D') # Print D summary print('-' * 80) print('Discriminator vars') nparams = 0 for v in D_vars: v_shape = v.get_shape().as_list() v_n = reduce(lambda x, y: x * y, v_shape) nparams += v_n print('{} ({}): {}'.format(v.get_shape().as_list(), v_n, v.name)) print('Total params: {} ({:.2f} MB)'.format(nparams, (float(nparams) * 4) / (1024 * 1024))) print('-' * 80) # Make fake / wrong discriminator with tf.name_scope('D_G_z'), tf.variable_scope('D', reuse=True): D_G_z = WaveGANDiscriminator(G_z, lod, **args.wavegan_d_kwargs) with tf.name_scope('D_w'), tf.variable_scope('D', reuse=True): D_w = WaveGANDiscriminator(wrong_audio, lod, **args.wavegan_d_kwargs) # Create loss D_clip_weights = None if args.wavegan_loss == 'dcgan': fake = tf.zeros([args.train_batch_size, 1], dtype=tf.float32) real = tf.ones([args.train_batch_size, 1], dtype=tf.float32) # Conditional G Loss G_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[0], labels=real)) G_loss += c_kl_loss # Unconditional G Loss if args.use_extra_uncond_loss: G_loss += tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[1], labels=real)) G_loss /= 2 # Conditional D Losses D_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[0], labels=fake)) D_loss_wrong = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_w[0], labels=fake)) D_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_x[0], labels=real)) # Unconditional D Losses if args.use_extra_uncond_loss: D_loss_fake_uncond = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_G_z[1], labels=fake)) D_loss_wrong_uncond = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_w[1], labels=real)) D_loss_real_uncond = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_x[1], labels=real)) D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \ + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond D_loss /= 2 else: D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) # Warmup Conditional Loss # D_warmup_loss = D_loss_real + D_loss_wrong elif args.wavegan_loss == 'lsgan': # Conditional G Loss G_loss = tf.reduce_mean((D_G_z[0] - 1.)**2) G_loss += c_kl_loss # Unconditional G Loss if args.use_extra_uncond_loss: G_loss += tf.reduce_mean((D_G_z[1] - 1.)**2) G_loss /= 2 # Conditional D Loss D_loss_real = tf.reduce_mean((D_x[0] - 1.)**2) D_loss_wrong = tf.reduce_mean(D_w[0]**2) D_loss_fake = tf.reduce_mean(D_G_z[0]**2) # Unconditional D Loss if args.use_extra_uncond_loss: D_loss_real_uncond = tf.reduce_mean((D_x[1] - 1.)**2) D_loss_wrong_uncond = tf.reduce_mean((D_w[1] - 1.)**2) D_loss_fake_uncond = tf.reduce_mean(D_G_z[1]**2) D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \ + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond D_loss /= 2 else: D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) # Warmup Conditional Loss # D_warmup_loss = D_loss_real + D_loss_wrong elif args.wavegan_loss == 'wgan': # Conditional G Loss G_loss = -tf.reduce_mean(D_G_z[0]) G_loss += c_kl_loss # Unconditional G Loss if args.use_extra_uncond_loss: G_loss += -tf.reduce_mean(D_G_z[1]) G_loss /= 2 # Conditional D Loss D_loss_real = -tf.reduce_mean(D_x[0]) D_loss_wrong = tf.reduce_mean(D_w[0]) D_loss_fake = tf.reduce_mean(D_G_z[0]) # Unconditional D Loss if args.use_extra_uncond_loss: D_loss_real_uncond = -tf.reduce_mean(D_x[1]) D_loss_wrong_uncond = -tf.reduce_mean(D_w[1]) D_loss_fake_uncond = tf.reduce_mean(D_G_z[1]) D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \ + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond D_loss /= 2 else: D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) # Warmup Conditional Loss # D_warmup_loss = D_loss_real + D_loss_wrong with tf.name_scope('D_clip_weights'): clip_ops = [] for var in D_vars: clip_bounds = [-.01, .01] clip_ops.append( tf.assign( var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) D_clip_weights = tf.group(*clip_ops) elif args.wavegan_loss == 'wgan-gp': # Conditional G Loss G_loss = -tf.reduce_mean(D_G_z[0]) G_loss += c_kl_loss # Unconditional G Loss if args.use_extra_uncond_loss: G_loss += -tf.reduce_mean(D_G_z[1]) G_loss /= 2 # Conditional D Loss D_loss_real = -tf.reduce_mean(D_x[0]) D_loss_wrong = tf.reduce_mean(D_w[0]) D_loss_fake = tf.reduce_mean(D_G_z[0]) # Unconditional D Loss if args.use_extra_uncond_loss: D_loss_real_uncond = -tf.reduce_mean(D_x[1]) D_loss_wrong_uncond = -tf.reduce_mean(D_w[1]) D_loss_fake_uncond = tf.reduce_mean(D_G_z[1]) D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \ + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond D_loss /= 2 else: D_loss = D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) # Warmup Conditional Loss # D_warmup_loss = D_loss_real + D_loss_wrong # Conditional Gradient Penalty alpha = tf.random_uniform(shape=[args.train_batch_size, 1, 1], minval=0., maxval=1.) real = x fake = tf.concat([ G_z[:args.train_batch_size // 2], wrong_audio[:args.train_batch_size // 2] ], 0) differences = fake - real interpolates = real + (alpha * differences) with tf.name_scope('D_interp'), tf.variable_scope('D', reuse=True): D_interp = WaveGANDiscriminator( interpolates, lod, **args.wavegan_d_kwargs)[0] # Only want conditional output gradients = tf.gradients(D_interp, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) cond_gradient_penalty = tf.reduce_mean((slopes - 1.)**2.) # Unconditional Gradient Penalty alpha = tf.random_uniform(shape=[args.train_batch_size, 1, 1], minval=0., maxval=1.) real = tf.concat([ x[:args.train_batch_size // 2], wrong_audio[:args.train_batch_size // 2] ], 0) fake = G_z differences = fake - real interpolates = real + (alpha * differences) with tf.name_scope('D_interp'), tf.variable_scope('D', reuse=True): D_interp = WaveGANDiscriminator( interpolates, lod, **args.wavegan_d_kwargs)[1] # Only want unconditional output gradients = tf.gradients(D_interp, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) uncond_gradient_penalty = tf.reduce_mean((slopes - 1.)**2.) # Warmup Gradient Penalty # alpha = tf.random_uniform(shape=[args.train_batch_size, 1, 1], minval=0., maxval=1.) # real = x # fake = wrong_audio # differences = fake - real # interpolates = real + (alpha * differences) # with tf.name_scope('D_interp'), tf.variable_scope('D', reuse=True): # D_interp = WaveGANDiscriminator(interpolates, lod, **args.wavegan_d_kwargs)[0] # Only want conditional output # gradients = tf.gradients(D_interp, [interpolates])[0] # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) # warmup_gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2.) gradient_penalty = (cond_gradient_penalty + uncond_gradient_penalty) / 2 LAMBDA = 10 D_loss += LAMBDA * gradient_penalty # D_warmup_loss += LAMBDA * warmup_gradient_penalty else: raise NotImplementedError() tf.summary.scalar('G_loss', G_loss) if (args.wavegan_loss == 'wgan-gp'): tf.summary.scalar('Gradient Penalty', LAMBDA * gradient_penalty) if (args.wavegan_loss == 'wgan' or args.wavegan_loss == 'wgan-gp'): if args.use_extra_uncond_loss: tf.summary.scalar('Critic Score - Real Data - Condition Match', -D_loss_real) tf.summary.scalar('Critic Score - Fake Data - Condition Match', D_loss_fake) tf.summary.scalar('Critic Score - Wrong Data - Condition Match', D_loss_wrong) tf.summary.scalar('Critic Score - Real Data', -D_loss_real_uncond) tf.summary.scalar('Critic Score - Wrong Data', -D_loss_wrong_uncond) tf.summary.scalar('Critic Score - Fake Data', D_loss_fake_uncond) tf.summary.scalar('Wasserstein Distance - No Regularization Term', -((D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \ + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond) / 2)) tf.summary.scalar('Wasserstein Distance - Real-Wrong Only', -(D_loss_real + D_loss_wrong)) tf.summary.scalar('Wasserstein Distance - Real-Fake Only', -((D_loss_real + D_loss_fake \ + D_loss_real_uncond + D_loss_fake_uncond) / 2)) else: tf.summary.scalar('Critic Score - Real Data', -D_loss_real) tf.summary.scalar('Critic Score - Wrong Data', D_loss_wrong) tf.summary.scalar('Critic Score - Fake Data', D_loss_fake) tf.summary.scalar( 'Wasserstein Distance - No Regularization Term', -(D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake))) tf.summary.scalar('Wasserstein Distance - With Regularization Term', -D_loss) else: if args.use_extra_uncond_loss: tf.summary.scalar('D_acc_uncond', 0.5 * ((0.5 * (tf.reduce_mean(tf.sigmoid(D_x[1])) + tf.reduce_mean(tf.sigmoid(D_w[1])))) \ + tf.reduce_mean(1 - tf.sigmoid(D_G_z[1])))) tf.summary.scalar('D_acc', 0.5 * (tf.reduce_mean(tf.sigmoid(D_x[0])) \ + 0.5 * (tf.reduce_mean(1 - tf.sigmoid(D_w[0])) + tf.reduce_mean(1 - tf.sigmoid(D_G_z[0]))))) tf.summary.scalar('D_acc_real_wrong_only', 0.5 * (tf.reduce_mean(tf.sigmoid(D_x[0])) \ + tf.reduce_mean(1 - tf.sigmoid(D_w[0])))) tf.summary.scalar('D_loss_cond_real', D_loss_real) tf.summary.scalar('D_loss_uncond_real', D_loss_real_uncond) tf.summary.scalar('D_loss_cond_wrong', D_loss_wrong) tf.summary.scalar('D_loss_uncond_wrong', D_loss_wrong_uncond) tf.summary.scalar('D_loss_cond_fake', D_loss_fake) tf.summary.scalar('D_loss_uncond_fake', D_loss_fake_uncond) tf.summary.scalar('D_loss_unregularized', (D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake) \ + 0.5 * (D_loss_real_uncond + D_loss_wrong_uncond) + D_loss_fake_uncond) / 2) else: tf.summary.scalar('D_acc', 0.5 * (tf.reduce_mean(tf.sigmoid(D_x[0])) \ + 0.5 * (tf.reduce_mean(1 - tf.sigmoid(D_w[0])) + tf.reduce_mean(1 - tf.sigmoid(D_G_z[0]))))) tf.summary.scalar('D_loss_real', D_loss_real) tf.summary.scalar('D_loss_wrong', D_loss_wrong) tf.summary.scalar('D_loss_fake', D_loss_fake) tf.summary.scalar('D_loss_unregularized', D_loss_real + 0.5 * (D_loss_wrong + D_loss_fake)) tf.summary.scalar('D_loss', D_loss) # Create (recommended) optimizer if args.wavegan_loss == 'dcgan': G_opt = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5) D_opt = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5) elif args.wavegan_loss == 'lsgan': G_opt = tf.train.RMSPropOptimizer(learning_rate=1e-4) D_opt = tf.train.RMSPropOptimizer(learning_rate=1e-4) elif args.wavegan_loss == 'wgan': G_opt = tf.train.RMSPropOptimizer(learning_rate=5e-5) D_opt = tf.train.RMSPropOptimizer(learning_rate=5e-5) elif args.wavegan_loss == 'wgan-gp': G_opt = tf.train.AdamOptimizer(learning_rate=4e-4, beta1=0.0, beta2=0.9) D_opt = tf.train.AdamOptimizer(learning_rate=4e-4, beta1=0.0, beta2=0.9) else: raise NotImplementedError() # Optimizer internal state reset ops reset_G_opt_op = tf.variables_initializer(G_opt.variables()) reset_D_opt_op = tf.variables_initializer(D_opt.variables()) # Create training ops G_train_op = G_opt.minimize( G_loss, var_list=G_vars, global_step=tf.train.get_or_create_global_step()) D_train_op = D_opt.minimize(D_loss, var_list=D_vars) def smoothstep(x, mi, mx): return mi + (mx - mi) * (lambda t: np.where( t < 0, 0, np.where(t <= 1, 3 * t**2 - 2 * t**3, 1)))(x) def np_lerp_clip(t, a, b): return a + (b - a) * np.clip(t, 0.0, 1.0) def get_lod_at_step(step): return np.piecewise(float(step), [ step < 10000, 10000 <= step < 20000, 20000 <= step < 30000, 30000 <= step < 40000, 40000 <= step < 50000, 50000 <= step < 60000, 60000 <= step < 70000, 70000 <= step < 80000, 80000 <= step < 90000, 90000 <= step < 100000 ], [ 0, lambda x: np_lerp_clip((x - 10000) / 10000, 0, 1), 1, lambda x: np_lerp_clip( (x - 30000) / 10000, 1, 2), 2, lambda x: np_lerp_clip( (x - 50000) / 10000, 2, 3), 3, lambda x: np_lerp_clip( (x - 70000) / 10000, 3, 4), 4, lambda x: np_lerp_clip( (x - 90000) / 10000, 4, 5), 5 ]) def my_filter_callable(datum, tensor): if (not isinstance(tensor, debug_data.InconvertibleTensorProto)) and ( tensor.dtype == np.float32 or tensor.dtype == np.float64): return np.any([ np.any(np.greater_equal(tensor, 50.0)), np.any(np.less_equal(tensor, -50.0)) ]) else: return False # Create a LocalCLIDebugHook and use it as a monitor # debug_hook = tf_debug.LocalCLIDebugHook(dump_root='C:/d/t/') # debug_hook.add_tensor_filter('large_values', my_filter_callable) # hooks = [debug_hook] # Run training with tf.train.MonitoredTrainingSession( checkpoint_dir=args.train_dir, save_checkpoint_secs=args.train_save_secs, save_summaries_secs=args.train_summary_secs) as sess: # Get the summary writer for writing extra summary statistics summary_writer = SummaryWriterCache.get(args.train_dir) cur_lod = 0 while True: # Calculate Maximum LOD to train step = sess.run(tf.train.get_or_create_global_step(), feed_dict={lod: cur_lod}) cur_lod = get_lod_at_step(step) prev_lod = get_lod_at_step(step - 1) # Reset optimizer internal state when new layers are introduced if np.floor(cur_lod) != np.floor(prev_lod) or np.ceil( cur_lod) != np.ceil(prev_lod): print( "Resetting optimizers' internal states at step {}".format( step)) sess.run([reset_G_opt_op, reset_D_opt_op], feed_dict={lod: cur_lod}) # Output current LOD and 'steps at currrent LOD' to tensorboard step = float( sess.run(tf.train.get_or_create_global_step(), feed_dict={lod: cur_lod})) lod_summary = tf.Summary(value=[ tf.Summary.Value(tag="current_lod", simple_value=float(cur_lod)), ]) summary_writer.add_summary(lod_summary, step) # Train discriminator for i in xrange(args.wavegan_disc_nupdates): sess.run(D_train_op, feed_dict={lod: cur_lod}) # Enforce Lipschitz constraint for WGAN if D_clip_weights is not None: sess.run(D_clip_weights, feed_dict={lod: cur_lod}) # Train generator sess.run(G_train_op, feed_dict={lod: cur_lod})
def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None): self._summary_tag = "global_step/sec" self._every_n_steps = every_n_steps self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir)
def exp(dir_prefix, tf_log_dir="ckpt", our_log_dir="logging", replay_cache_dir="ReplayBufferCache", gpu_mem_fraction=0.15, save_checkpoint_secs=3600): n_skip = 6 n_stack = 3 if_random_phase = True # === Agent # --- agent basic ALL_ACTIONS = [(ord(mode), ) for mode in ['s', 'd', 'a']] + [(0, )] AGENT_ACTIONS = ALL_ACTIONS[:3] num_actions = len(AGENT_ACTIONS) noop = 3 gamma = 0.9 greedy_epsilon = CappedLinear(int(3e4), 0.2, 0.05) # --- replay buffer # replay_upsample_bias = (1, 1, 1, 0.1) # --- NN architecture f_net = lambda inputs: f_dueling_q(inputs, num_actions) if_ddqn = True # --- optimization batch_size = 8 learning_rate = 1e-4 target_sync_interval = 1 target_sync_rate = 1e-3 update_interval = 1 max_grad_norm = 1.0 sample_mimimum_count = 100 update_ratio = 8.0 # --- logging and ckpt replay_capacity = 300 replay_ratio_active = 1.0 # === Reward function class FuncReward(object): def __init__(self, gamma): self.__gamma = gamma self._ema_speed = 10.0 self._ema_dist = 0.0 self._obs_risk = 0.0 self._road_change = False self._mom_opp = 0.0 self._mom_biking = 0.0 self._steering = False self._waiting_steps = 0 def reset(self): self._ema_speed = 10.0 self._ema_dist = 0.0 self._obs_risk = 0.0 self._road_change = False self._mom_opp = 0.0 self._mom_biking = 0.0 self._steering = False def _func_scalar_reward(self, rewards, action): """Coverts a vector reward into a scalar.""" info = {} # append a reward that is 1 when action is lane switching rewards = rewards.tolist() print(' ' * 3 + 'R: [' + '{:4.2f} ' * len(rewards) + ']').format(*rewards), # extract relevant rewards. speed = rewards[0] dist = rewards[1] obs_risk = rewards[2] # road_invalid = rewards[3] > 0.01 # any yellow or red road_change = rewards[4] > 0.01 # entering intersection opp = rewards[5] biking = rewards[6] # inner = rewards[7] # outter = rewards[8] steer = np.logical_or(action == 1, action == 2) if speed < 0.1: self._waiting_steps += 1 else: self._waiting_steps = 0 # update reward-related state vars ema_speed = 0.5 * self._ema_speed + 0.5 * speed ema_dist = 1.0 if dist > 2.0 else 0.9 * self._ema_dist mom_opp = min((opp < 0.5) * (self._mom_opp + 1), 20) mom_biking = min((biking > 0.5) * (self._mom_biking + 1), 12) steering = steer if action != 3 else self._steering self._ema_speed = ema_speed self._ema_dist = ema_dist self._obs_risk = obs_risk self._road_change = road_change self._mom_opp = mom_opp self._mom_biking = mom_biking self._steering = steering print '{:3.0f}, {:3.0f}, {:4.2f}, {:3.0f}'.format( mom_opp, mom_biking, ema_dist, self._steering), info['reward_fun/speed'] = speed info['reward_fun/dist2longest'] = dist info['reward_fun/obs_risk'] = obs_risk info['reward_fun/road_change'] = road_change info['reward_fun/on_opposite'] = opp info['reward_fun/on_biking'] = biking info['reward_fun/steer'] = steer info['reward_fun/mom_opposite'] = mom_opp info['reward_fun/mom_biking'] = mom_biking info['waiting_steps'] = self._waiting_steps # calculate scalar reward reward = [ # velocity speed * 10 - 10, # obs factor -100.0 * obs_risk, # opposite -20 * (0.9 + 0.1 * mom_opp) * (mom_opp > 1.0), # ped -40 * (0.9 + 0.1 * mom_biking) * (mom_biking > 1.0), # steer steering * -40.0, ] reward = np.sum(reward) / 100.0 print ': {:5.2f}'.format(reward) return reward, info def _func_early_stopping(self): """Several early stopping criterion.""" info = {} done = False # switched lane while going into intersection. if self._road_change and self._ema_dist > 0.2: print "[Episode early stopping] turned into intersection." done = True info['banned_road_change'] = True # used biking lane to cross intersection if self._road_change and self._mom_biking > 0: print "[Episode early stopping] entered intersection on biking lane." done = True info['banned_road_change'] = True # hit obstacle if self._obs_risk > 1.0: print "[Episode early stopping] hit obstacle." done = True # waiting too long if self._waiting_steps > 80: print "[Episode early stopping] waiting too long" done = True return done, info def _func_skipping_bias(self, reward, done, info, n_skip=1, cnt_skip=0): new_info = {} if 'banned_road_change' in info: reward -= 1.0 * (n_skip - cnt_skip) if done: pass # reward /= (1 - self.__gamma) / (n_skip - cnt_skip) new_info['reward_fun/reward'] = reward return reward, new_info def __call__(self, action, rewards, done, n_skip=1, cnt_skip=0): info = {} reward, info_diff = self._func_scalar_reward(rewards, action) info.update(info_diff) early_done, info_diff = self._func_early_stopping() done = done | early_done info.update(info_diff) reward, info_diff = self._func_skipping_bias( reward, done, info, n_skip, cnt_skip) info.update(info_diff) if done: info['flag_success'] = reward > 0.0 self.reset() return reward, done, info # ========================================== # ========================================== # ========================================== env, replay_buffer, _agent = None, None, None try: # Parse flags tf_log_dir = os.sep.join([dir_prefix, tf_log_dir]) our_log_dir = os.sep.join([dir_prefix, our_log_dir]) replay_cache_dir = os.sep.join([dir_prefix, replay_cache_dir]) summary_writer = SummaryWriterCache.get(tf_log_dir) # global_step = tf.get_variable( # 'global_step', [], dtype=tf.int32, # initializer=tf.constant_initializer(0), trainable=False) # Environment def gen_default_backend_cmds(): ws_path = '/Projects/catkin_ws/' initialD_path = '/Projects/hobotrl/playground/initialD/' backend_path = initialD_path + 'ros_environments/backend_scripts/' utils_path = initialD_path + 'ros_environments/backend_scripts/utils/' backend_cmds = [ ['python', utils_path + '/iterate_test_case.py'], # Parse maps [ 'python', utils_path + 'parse_map.py', ws_path + 'src/Map/src/map_api/data/honda_wider.xodr', utils_path + 'road_segment_info.txt' ], # Start roscore ['roscore'], # Reward function script ['python', backend_path + 'gazebo_rl_reward.py'], # Road validity node script [ 'python', backend_path + 'road_validity.py', utils_path + 'road_segment_info.txt.signal' ], # Simulation restarter backend ['python', backend_path + 'rviz_restart.py', 'next.launch'], # Video capture ['python', backend_path + 'car_go.py', '--use-dummy-action'] ] return backend_cmds env = DrSimRuleDecisionK8S( image_uri='docker.hobot.cc/carsim/simulator_gpu_kub:0.0.10_384.111', backend_cmds=gen_default_backend_cmds(), is_dummy_action=True) # Agent state_shape = env.observation_space.shape # Utilities stepsSaver = StepsSaver(our_log_dir) reward_vector2scalar = FuncReward(gamma) # Configure sess n_ep = 0 n_total_steps = 0 # GoGoGo while n_total_steps <= 2.5e5: cum_reward = 0.0 n_ep_steps = 0 state_action = env.reset() state, action = state_action print "action: ", action # print "state shape: {}".format(state.shape) while True: next_state_action, vec_reward, done, env_info = env.step(3) next_state, next_action = next_state_action reward, done, reward_info = reward_vector2scalar( action, vec_reward, done) env_info.update(reward_info) summary_proto = log_info( {}, env_info, done, cum_reward, n_ep, n_ep_steps, n_total_steps, ) summary_writer.add_summary(summary_proto, n_total_steps) n_total_steps += 1 n_ep_steps += 1 cum_reward += reward flag_success = reward_info['flag_success'] \ if 'flag_success' in reward_info else False stepsSaver.save(n_ep, n_ep_steps, state, action, vec_reward, reward, done, cum_reward, flag_success) state, action = next_state, next_action print "action: ", action if done: n_ep += 1 logging.warning( "Episode {} finished in {} steps, reward is {}.". format( n_ep, n_ep_steps, cum_reward, )) break if n_ep >= 100: break except Exception as e: print e.message traceback.print_exc() finally: logging.warning("=" * 30) logging.warning("=" * 30) logging.warning("Tidying up...") # kill orphaned monitor daemon process if env is not None: env.env.exit() replay_buffer.close() if replay_buffer is not None: replay_buffer.close() if _agent is not None: _agent.stop() # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) import time logging.warning("waiting for k8s end") time.sleep(180) logging.warning("=" * 30)
# Loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels_t) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=config['lr']) global_step_t = tf.train.create_global_step() train_op = optimizer.minimize(loss, global_step=global_step_t) # Session sess = tf.train.MonitoredTrainingSession(checkpoint_dir=logdir, save_checkpoint_secs=60, save_summaries_steps=50) # Summaries summary_writer = SummaryWriterCache.get(logdir) metrics_logger = experiment_logging.TensorboardLogger(writer=summary_writer) shutil.copyfile(config_path, logdir + '/config.py') # save config in logdir # Fetch entire dev set (no need to do this inside the eval loop repeatedly) image_dev, question_dev, label_dev = next(batcher_dev) dev_feed_dict = { images_t: image_dev, questions_t: question_dev, labels_t: label_dev } # Train-Eval loop while True: image, question, label = next(batcher_train) current_step, train_loss, _ = sess.run([global_step_t, loss, train_op],
def train(): """Running the main training loop with given parameters.""" if FLAGS.task == 0 and not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) # Read train/dev/test graphs, create datasets and model add_inverse_edge = FLAGS.model in \ ["source_rel_attention", "source_path_attention"] train_graph, train_data = read_graph_data( kg_file=FLAGS.kg_file, add_reverse_graph=not add_inverse_edge, add_inverse_edge=add_inverse_edge, mode="train", num_epochs=FLAGS.num_epochs, batchsize=FLAGS.batchsize, max_neighbors=FLAGS.max_neighbors, max_negatives=FLAGS.max_negatives, text_kg_file=FLAGS.text_kg_file ) worker_device = "/job:{}".format(FLAGS.brain_job_name) with tf.device( tf.train.replica_device_setter( FLAGS.ps_tasks, worker_device=worker_device)): iterator = train_data.dataset.make_one_shot_iterator() candidate_scores, _, labels, model, is_train_ph, _ = create_model( train_graph, iterator ) # Create train loss and training op loss = losses.softmax_crossentropy(logits=candidate_scores, labels=labels) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) global_step = tf.Variable(0, name="global_step", trainable=False) train_op = get_train_op(loss, optimizer, FLAGS.grad_clip, global_step=global_step) tf.summary.scalar("Loss", loss) run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True) session_config = tf.ConfigProto(log_device_placement=True) # Create tf training session scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=1000)) # ckpt_hook = tf.train.CheckpointSaverHook( # checkpoint_dir=FLAGS.output_dir, scaffold=scaffold, # save_steps=FLAGS.save_every # ) # summary_hook = tf.train.SummarySaverHook( # save_secs=60, output_dir=FLAGS.output_dir, # summary_op=tf.summary.merge_all() # ) session = tf.train.MonitoredTrainingSession( master=FLAGS.master, is_chief=(FLAGS.task == 0), checkpoint_dir=FLAGS.output_dir, save_checkpoint_steps=FLAGS.save_every, scaffold=scaffold, save_summaries_secs=60, # hooks=[summary_hook], # chief_only_hooks=[ckpt_hook], config=session_config ) # Create embeddings visualization if FLAGS.task == 0: utils.save_embedding_vocabs(FLAGS.output_dir, train_graph, FLAGS.entity_names_file) pconfig = projector.ProjectorConfig() add_embedding_to_projector( pconfig, model["entity_encoder"].embeddings.name.split(":")[0], os.path.join(FLAGS.output_dir, "entity_vocab.tsv") ) add_embedding_to_projector( pconfig, model["relation_encoder"].embeddings.name.split(":")[0], os.path.join(FLAGS.output_dir, "relation_vocab.tsv") ) if FLAGS.text_kg_file: word_embeddings = model["text_encoder"].word_embedding_encoder.embeddings add_embedding_to_projector( pconfig, word_embeddings.name.split(":")[0], os.path.join(FLAGS.output_dir, "word_vocab.tsv") ) projector.visualize_embeddings( SummaryWriterCache.get(FLAGS.output_dir), pconfig ) # Main training loop running_total_loss = 0. nsteps = 0 gc.collect() while True: try: current_loss, _, _ = session.run( [loss, train_op, global_step], # feed_dict={is_train_ph: True, handle: train_iterator_handle}, feed_dict={is_train_ph: True}, options=run_options ) nsteps += 1 running_total_loss += current_loss tf.logging.info("Step %d, loss: %.3f, running avg loss: %.3f", nsteps, current_loss, running_total_loss / nsteps) if nsteps %2 == 0: gc.collect() except tf.errors.OutOfRangeError: tf.logging.info("End of Traning Epochs after %d steps", nsteps) break
def exp(dir_prefix, tf_log_dir="ckpt", our_log_dir="logging", replay_cache_dir="ReplayBufferCache", gpu_mem_fraction=0.15, save_checkpoint_secs=3600): n_skip = 6 n_stack = 3 if_random_phase = True # === Agent # --- agent basic ALL_ACTIONS = [(ord(mode), ) for mode in ['s', 'd', 'a']] + [(0, )] AGENT_ACTIONS = ALL_ACTIONS[:3] num_actions = len(AGENT_ACTIONS) noop = 3 gamma = 0.9 ckpt_step = 0 greedy_epsilon = CappedLinear( int(3e4) - ckpt_step, 0.2 - (0.15 / 3e4 * ckpt_step), 0.05) start_step = ckpt_step * 6 # --- replay buffer replay_bucket_size = 100 replay_max_sample_epoch = 2 # replay_upsample_bias = (1, 1, 1, 0.1) # --- NN architecture f_net = lambda inputs: f_dueling_q(inputs, num_actions) if_ddqn = True # --- optimization batch_size = 8 learning_rate = 1e-4 target_sync_interval = 1 target_sync_rate = 1e-3 update_interval = 1 max_grad_norm = 1.0 sample_mimimum_count = 100 update_ratio = 8.0 # --- logging and ckpt replay_capacity = 300 replay_ratio_active = 1.0 # === Reward function class FuncReward(object): def __init__(self, gamma): self.__gamma = gamma self._ema_speed = 10.0 self._ema_dist = 0.0 self._obs_risk = 0.0 self._road_change = False self._mom_opp = 0.0 self._mom_biking = 0.0 self._steering = False self._waiting_steps = 0 def reset(self): self._ema_speed = 10.0 self._ema_dist = 0.0 self._obs_risk = 0.0 self._road_change = False self._mom_opp = 0.0 self._mom_biking = 0.0 self._steering = False def _func_scalar_reward(self, rewards, action): """Coverts a vector reward into a scalar.""" info = {} # append a reward that is 1 when action is lane switching rewards = rewards.tolist() print(' ' * 3 + 'R: [' + '{:4.2f} ' * len(rewards) + ']').format(*rewards), # extract relevant rewards. speed = rewards[0] dist = rewards[1] obs_risk = rewards[2] # road_invalid = rewards[3] > 0.01 # any yellow or red road_change = rewards[4] > 0.01 # entering intersection opp = rewards[5] biking = rewards[6] # inner = rewards[7] # outter = rewards[8] steer = np.logical_or(action == 1, action == 2) if speed < 0.1: self._waiting_steps += 1 else: self._waiting_steps = 0 # update reward-related state vars ema_speed = 0.5 * self._ema_speed + 0.5 * speed ema_dist = 1.0 if dist > 2.0 else 0.9 * self._ema_dist mom_opp = min((opp < 0.5) * (self._mom_opp + 1), 1) mom_biking = min((biking > 0.5) * (self._mom_biking + 1), 1) steering = steer if action != 3 else self._steering self._ema_speed = ema_speed self._ema_dist = ema_dist self._obs_risk = obs_risk self._road_change = road_change self._mom_opp = mom_opp self._mom_biking = mom_biking self._steering = steering print '{:3.0f}, {:3.0f}, {:4.2f}, {:3.0f}'.format( mom_opp, mom_biking, ema_dist, self._steering), info['reward_fun/speed'] = speed info['reward_fun/dist2longest'] = dist info['reward_fun/obs_risk'] = obs_risk info['reward_fun/road_change'] = road_change info['reward_fun/on_opposite'] = opp info['reward_fun/on_biking'] = biking info['reward_fun/steer'] = steer info['reward_fun/mom_opposite'] = mom_opp info['reward_fun/mom_biking'] = mom_biking info['waiting_steps'] = self._waiting_steps # calculate scalar reward reward = [ # velocity speed * 10 - 10, # obs factor -100.0 * obs_risk, # opposite -10 * (0.9 + 0.1 * mom_opp) * (mom_opp > 0.99), # ped -10 * (0.9 + 0.1 * mom_biking) * (mom_biking > 0.99), # steer steering * -40.0, ] reward = np.sum(reward) / 100.0 print ': {:5.2f}'.format(reward) return reward, info def _func_early_stopping(self): """Several early stopping criterion.""" info = {} done = False # switched lane while going into intersection. if self._road_change and self._ema_dist > 0.2: print "[Episode early stopping] turned into intersection." done = True info['banned_road_change'] = True # used biking lane to cross intersection if self._road_change and self._mom_biking > 0: print "[Episode early stopping] entered intersection on biking lane." done = True info['banned_road_change'] = True # hit obstacle if self._obs_risk > 1.0: print "[Episode early stopping] hit obstacle." done = True # waiting too long if self._waiting_steps > 80: print "[Episode early stopping] waiting too long" done = True return done, info def _func_skipping_bias(self, reward, done, info, n_skip, cnt_skip): new_info = {} if 'banned_road_change' in info: reward -= 1.0 * (n_skip - cnt_skip) if done: pass # reward /= (1 - self.__gamma) / (n_skip - cnt_skip) new_info['reward_fun/reward'] = reward return reward, new_info def __call__(self, action, rewards, done, n_skip=1, cnt_skip=0): info = {} reward, info_diff = self._func_scalar_reward(rewards, action) info.update(info_diff) early_done, info_diff = self._func_early_stopping() done = done | early_done info.update(info_diff) reward, info_diff = self._func_skipping_bias( reward, done, info, n_skip, cnt_skip) info.update(info_diff) if done: info['flag_success'] = reward > 0.0 self.reset() return reward, done, info # ========================================== # ========================================== # ========================================== env, replay_buffer, _agent = None, None, None try: # Parse flags # FLAGS = tf.app.flags.FLAGS tf_log_dir = os.sep.join([dir_prefix, tf_log_dir]) our_log_dir = os.sep.join([dir_prefix, our_log_dir]) replay_cache_dir = os.sep.join([dir_prefix, replay_cache_dir]) # Modify tf graph graph = tf.get_default_graph() # -- create learning rate var and optimizer lr = tf.get_variable('learning_rate', [], dtype=tf.float32, initializer=tf.constant_initializer(1e-3), trainable=False) lr_in = tf.placeholder(dtype=tf.float32) op_set_lr = tf.assign(lr, lr_in) optimizer_td = tf.train.AdamOptimizer(learning_rate=lr) # -- create global step variable global_step = tf.get_variable('global_step', [], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) def gen_default_backend_cmds(): ws_path = '/Projects/catkin_ws/' initialD_path = '/Projects/hobotrl/playground/initialD/' backend_path = initialD_path + 'ros_environments/backend_scripts/' utils_path = initialD_path + 'ros_environments/backend_scripts/utils/' backend_cmds = [ ['python', utils_path + '/iterate_test_case.py'], # Parse maps [ 'python', utils_path + 'parse_map.py', ws_path + 'src/Map/src/map_api/data/honda_wider.xodr', utils_path + 'road_segment_info.txt' ], # Start roscore ['roscore'], # Reward function script ['python', backend_path + 'gazebo_rl_reward.py'], # Road validity node script [ 'python', backend_path + 'road_validity.py', utils_path + 'road_segment_info.txt.signal' ], # Simulation restarter backend ['python', backend_path + 'rviz_restart.py', 'next.launch'], ] return backend_cmds # Environment env = FrameStack( DrSimDecisionK8S(backend_cmds=gen_default_backend_cmds()), n_stack) # Agent replay_buffer = BigPlayback( bucket_cls=MapPlayback, cache_path=replay_cache_dir, capacity=replay_capacity, bucket_size=replay_bucket_size, ratio_active=replay_ratio_active, max_sample_epoch=replay_max_sample_epoch, ) state_shape = env.observation_space.shape __agent = DQN( f_create_q=f_net, state_shape=state_shape, # OneStepTD arguments num_actions=num_actions, discount_factor=gamma, ddqn=if_ddqn, # target network sync arguments target_sync_interval=target_sync_interval, target_sync_rate=target_sync_rate, # epsilon greedy arguments greedy_epsilon=greedy_epsilon, # optimizer arguments network_optimizer=LocalOptimizer(optimizer_td, max_grad_norm), # sampler arguments sampler=TransitionSampler(replay_buffer, batch_size=batch_size, interval=update_interval, minimum_count=sample_mimimum_count), # checkpoint global_step=global_step) # Utilities stepsSaver = StepsSaver(our_log_dir) reward_vector2scalar = FuncReward(gamma) # Configure sess config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_fraction with __agent.create_session( config=config, save_dir=tf_log_dir, save_checkpoint_secs=save_checkpoint_secs) as sess, \ AsynchronousAgent( agent=__agent, method='ratio', ratio=update_ratio) as _agent: agent = SkippingAgent( # n_skip_vec=(2, 6, 6), agent=_agent, n_skip=n_skip, specific_act=noop) summary_writer = SummaryWriterCache.get(tf_log_dir) # set vars sess.run(op_set_lr, feed_dict={lr_in: learning_rate}) print "Using learning rate {}".format(sess.run(lr)) n_ep = 0 n_total_steps = start_step # GoGoGo while n_total_steps <= 2.5e5: cum_reward = 0.0 n_ep_steps = 0 state = env.reset() while True: action = agent.act(state, exploration=False) if action != 3: print_qvals(n_ep_steps, __agent, state, action, AGENT_ACTIONS) next_state, vec_reward, done, env_info = env.step(action) reward, done, reward_info = reward_vector2scalar( action, vec_reward, done, agent.n_skip, agent.cnt_skip) agent_info = agent.step(sess=sess, state=state, action=action, reward=reward, next_state=next_state, episode_done=done, learning_off=True) env_info.update(reward_info) summary_proto = log_info( agent_info, env_info, done, cum_reward, n_ep, n_ep_steps, n_total_steps, ) summary_writer.add_summary(summary_proto, n_total_steps) n_total_steps += 1 n_ep_steps += 1 cum_reward += reward flag_success = reward_info['flag_success'] \ if 'flag_success' in reward_info else False stepsSaver.save(n_ep, n_total_steps, state, action, vec_reward, reward, done, cum_reward, flag_success) state = next_state if done: n_ep += 1 logging.warning( "Episode {} finished in {} steps, reward is {}.". format( n_ep, n_ep_steps, cum_reward, )) break if n_ep >= 100: break except Exception as e: print e.message traceback.print_exc() finally: logging.warning("=" * 30) logging.warning("=" * 30) logging.warning("Tidying up...") # kill orphaned monitor daemon process if env is not None: env.env.exit() replay_buffer.close() if replay_buffer is not None: replay_buffer.close() if _agent is not None: _agent.stop() # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) import time logging.warning("waiting for k8s end") time.sleep(180) logging.warning("=" * 30)
def run_experiment( random, model_fn, dataset_fn, optimizer, train_batch_size, dev_batch_size, max_context_len, max_answer_len, max_question_len, eval_every_steps, dropout, logdir, **unused ): basic_metrics = { 'f1_score': f1_score, 'precision_score': precision_score, 'recall_score': recall_score, } train_data, dev_data, misc = dataset_fn() assert len(train_data) > 0 and len(dev_data) > 0 word2id = misc['word2id'] embedding_matrix = misc['embedding_matrix'] all_training_data = np.array( sum([dataset for dataset in train_data.values()], []) ) # all_dev_data = np.array( for small dev batches # sum([dataset for dataset in dev_data.values()], []) # ) # Graph inputs context_t = tf.placeholder(tf.int32, [None, max_context_len], name='context_t') context_t_length = tf.placeholder(tf.int32, [None], name='context_t_length') question_t = tf.placeholder(tf.int32, [None, max_question_len], name='question_t') question_t_length = tf.placeholder(tf.int32, [None], name='question_t_length') span2position = data_ops.make_span2position( seq_size=max_context_len, max_len=max_answer_len ) span_mask_t = tf.placeholder(tf.int32, [None, len(span2position)], name='span_mask_t') label_t = tf.placeholder(tf.float32, [None, len(span2position)], name='label_t') is_training = tf.placeholder(tf.bool, name='is_training_flag') position2span = {v: k for k, v in span2position.items()} id2word = {v: k for k, v in word2id.items()} # Model outputs logits_t, *the_rest = model_fn( context_t, context_t_length, question_t, question_t_length, span2position, embedding_matrix, span_mask_t, is_training ) # Build a mask which masks out-of-bound spans span_mask = tf.cast(span_mask_t, tf.float32) # Mask the logits of spans which shouldn't be considered logits_t *= span_mask logit_min = tf.reduce_min(logits_t, axis=1, keepdims=True) logits_t -= logit_min logits_t *= span_mask # Find the indexes of the predicted spans y_preds = tf.argmax(logits_t, axis=1) # For numerical stability reasons subtract the max logit_max = tf.reduce_max(logits_t, axis=1, keepdims=True) logits_t -= logit_max logits_t *= span_mask # Negative log likelihood (i.e. multiclass cross-entropy) loss exp_logits_t = tf.exp(logits_t) * span_mask log_sum_exp_logits_t = tf.log(tf.reduce_sum(exp_logits_t, axis=1) + 1e-7) gather_mask = tf.one_hot(y_preds, depth=logits_t.get_shape()[-1], dtype=tf.bool, on_value=True, off_value=False) y_logits = tf.boolean_mask(logits_t, gather_mask) xents = log_sum_exp_logits_t - y_logits loss_t = tf.reduce_mean(xents) prediction_probs_t = exp_logits_t / tf.expand_dims(tf.reduce_sum(exp_logits_t, axis=1), 1) # Optimizer global_step_t = tf.train.create_global_step() train_op = optimizer.minimize(loss_t, global_step=global_step_t) # Session sess = tf.train.MonitoredTrainingSession( checkpoint_dir=logdir, save_checkpoint_secs=60000, save_summaries_steps=50 ) # Summaries summary_writer = SummaryWriterCache.get(logdir) metrics_logger = experiment_logging.TensorboardLogger(writer=summary_writer) shutil.copyfile(config_path, logdir + '/config.py') # save config in logdir # Fetch entire dev set (no need to do this inside the eval loop repeatedly) dev_feed_dicts = { # One feed dict for each dataset dataset_name: { context_t: np.asarray([x['context'] for x in dataset]), context_t_length: np.asarray([x['context_len'] for x in dataset]), question_t: np.asarray([x['question'] for x in dataset]), question_t_length: np.asarray([x['question_len'] for x in dataset]), label_t: np.asarray([x['label'] for x in dataset]), span_mask_t: np.asarray([x['span_mask'] for x in dataset]), } for dataset_name, dataset in dev_data.items() } # Train-Eval loop epoch_indices = np.random.permutation(np.arange(len(all_training_data))) while True: train_indices = epoch_indices[:train_batch_size] if len(epoch_indices) < train_batch_size: epoch_indices = np.random.permutation(np.arange(len(all_training_data))) train_batch = all_training_data[train_indices] train_feed_dict = { context_t: np.asarray([x['context'] for x in train_batch]), context_t_length: np.asarray([x['context_len'] for x in train_batch]), question_t: np.asarray([x['question'] for x in train_batch]), question_t_length: np.asarray([x['question_len'] for x in train_batch]), label_t: np.asarray([x['label'] for x in train_batch]), span_mask_t: np.asarray([x['span_mask'] for x in train_batch]), is_training: False, # 'out_dropout:0': dropout, } current_step, train_loss, _xents, _logits_t, _exp_logits_t, _log_sum_exp_logits_t, *_the_rest = sess.run( [global_step_t, loss_t, xents, logits_t, exp_logits_t, log_sum_exp_logits_t] + the_rest + [train_op], feed_dict=train_feed_dict ) if eval_every_steps is not None and current_step % eval_every_steps == 0: beginning_of_eval_time = time.time() logging.info('<large eval>:dev') # batch eval each dataset outputs_for_each_dataset = {} for dataset_name, dataset_feed_dict in dev_feed_dicts.items(): logging.info(f'Computing dev outputs for {dataset_name}') batched_feed_dicts = [ { placeholder: eval_data[i: i+dev_batch_size] for placeholder, eval_data in dataset_feed_dict.items() } for i in range(0, len(dev_data[dataset_name]), dev_batch_size) ] for d in batched_feed_dicts: d.update({is_training: False}) dataset_model_output = None batched_model_outputs = [ sess.run( { 'prediction_probs_t': prediction_probs_t, 'label_t': label_t, 'loss_per_example_t': xents }, feed_dict=batch_feed_dict ) for batch_feed_dict in tqdm(batched_feed_dicts) ] dataset_model_output = { tensor_name: np.concatenate([output[tensor_name] for output in batched_model_outputs]) for tensor_name in batched_model_outputs[0].keys() } outputs_for_each_dataset[dataset_name] = dataset_model_output # much nicer, non batched version of evaluating # outputs_for_each_dataset = { # dataset_name: sess.run( # { # 'prediction_probs_t': prediction_probs_t, # 'label_t': label_t, # 'loss_per_example_t': loss_per_example_t # }, # feed_dict=dataset_feed_dict # ) for dataset_name, dataset_feed_dict in dev_feed_dicts.items() # } # build a combined dataset output_names = outputs_for_each_dataset[list(outputs_for_each_dataset.keys())[0]].keys() # HACK all_dev_outputs = { output_name: np.concatenate([ outputs_for_each_dataset[dataset_name][output_name] for dataset_name in outputs_for_each_dataset ]) for output_name in output_names } outputs_for_each_dataset['combined'] = all_dev_outputs for dataset_name, dev_model_outputs in outputs_for_each_dataset.items(): metrics_logger.log_scalar( f'loss/{dataset_name}', dev_model_outputs['loss_per_example_t'].mean(), current_step ) dev_probs = dev_model_outputs['prediction_probs_t'] dev_labels = dev_model_outputs['label_t'] # predicted_labels = (dev_probs > 0.5).astype(int) predicted_labels = (dev_probs.max(axis=1, keepdims=1) == dev_probs).astype(int) for metric_name, metric_fn in basic_metrics.items(): score = metric_fn( y_true=np.ndarray.flatten(dev_labels), y_pred=np.ndarray.flatten(predicted_labels), average=None ) for i, val in enumerate(score): metrics_logger.log_scalar( f'{metric_name}/{dataset_name}/label_{i}', val, current_step ) acc = accuracy_score( y_true=np.ndarray.flatten(dev_labels), y_pred=np.ndarray.flatten(predicted_labels), ) metrics_logger.log_scalar( f'accuracy/{dataset_name}', acc, current_step ) if dataset_name == 'combined': # only want per-dataset examples continue context_dev = [x['context_raw'] for x in dev_data[dataset_name]] question_dev = [x['question_raw'] for x in dev_data[dataset_name]] np.all((dev_labels == predicted_labels), axis=1) to_pick_correct = experiment_logging.select_n_classified( ground_truth=dev_labels, predicted=predicted_labels, correct=True, n=2 ) to_pick_wrong = experiment_logging.select_n_classified( ground_truth=dev_labels, predicted=predicted_labels, correct=False, n=2 ) prob_dist = np.argmax(dev_probs, axis=1) span_counts = Counter(prob_dist) sorted_span_counts = sorted(span_counts.items()) span_pos_counts = dict([(position2span[x[0]], x[1]) for x in sorted_span_counts]) print('DEV predicted span distribution') print(span_pos_counts) # TODO: repeated code, move to methods? + the following code cannot handle cases where some spans are # correct and others aren't (it will just show them as being all wrong). if to_pick_correct: correct_spans = [ [position2span[i] for i, x in enumerate(predicted_labels[p]) if x == 1] for p in to_pick_correct ] correct_contexts = [context_dev[p] for p in to_pick_correct] correct_questions = [question_dev[p] for p in to_pick_correct] for s, c, q in zip(correct_spans, correct_contexts, correct_questions): prompt = ' '.join(q) experiment_logging.print_spans(c, s, prompt) if to_pick_wrong: wrong_spans = [ [position2span[i] for i, x in enumerate(predicted_labels[p]) if x == 1] for p in to_pick_wrong ] wrong_contexts = [context_dev[p] for p in to_pick_wrong] wrong_questions = [question_dev[p] for p in to_pick_wrong] for s, c, q in zip(wrong_spans, wrong_contexts, wrong_questions): prompt = ' '.join(q) experiment_logging.print_spans( tokens=c, spans=s, prompt=prompt, span_color='\x1b[6;30;41m', prompt_color='\33[1m\33[31m' ) logging.info(f'evaluation took {time.time() - beginning_of_eval_time:.2f} seconds')
def work(self): hooks = [self.ppo.sync_replicas_hook] sess = tf.train.MonitoredTrainingSession(master=self.server.target, is_chief=(self.wid == 0), checkpoint_dir=SUMMARY_DIR, save_summaries_steps=None, save_summaries_secs=None, hooks=hooks) if self.wid == 0: writer = SummaryWriterCache.get(SUMMARY_DIR) t, episode, terminal = 0, 0, False buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], [] rolling_r = RunningStats() while not sess.should_stop() and not (episode > EP_MAX and self.wid == 0): s = self.env.reset() ep_r, ep_t, ep_a = 0, 0, [] while True: a, v = self.ppo.evaluate_state(s, sess) # Update ppo if t == BATCH: # or (terminal and t < BATCH): # Normalise rewards rewards = np.array(buffer_r) rolling_r.update(rewards) rewards = np.clip(rewards / rolling_r.std, -10, 10) v_final = [ v * (1 - terminal) ] # v = 0 if terminal, otherwise use the predicted v values = np.array(buffer_v + v_final) terminals = np.array(buffer_terminal + [terminal]) # Generalized Advantage Estimation - https://arxiv.org/abs/1506.02438 delta = rewards + GAMMA * values[1:] * ( 1 - terminals[1:]) - values[:-1] advantage = discount(delta, GAMMA * LAMBDA, terminals) returns = advantage + np.array(buffer_v) advantage = (advantage - advantage.mean()) / np.maximum( advantage.std(), 1e-6) bs, ba, br, badv = np.reshape(buffer_s, (t,) + self.ppo.s_dim), np.vstack(buffer_a), \ np.vstack(returns), np.vstack(advantage) graph_summary = self.ppo.update(bs, ba, br, badv, sess) buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], [] t = 0 buffer_s.append(s) buffer_a.append(a) buffer_v.append(v) buffer_terminal.append(terminal) ep_a.append(a) if not self.ppo.discrete: a = np.clip(a, self.env.action_space.low, self.env.action_space.high) s, r, terminal, _ = self.env.step(a) buffer_r.append(r) ep_r += r ep_t += 1 t += 1 if terminal: # End of episode summary print('Worker_%i' % self.wid, '| Episode: %i' % episode, "| Reward: %.2f" % ep_r, '| Steps: %i' % ep_t) if self.wid == 0: worker_summary = tf.Summary() worker_summary.value.add(tag="Reward", simple_value=ep_r) # Create Action histograms for each dimension actions = np.array(ep_a) if self.ppo.discrete: add_histogram(writer, "Action", actions, episode, bins=self.ppo.a_dim) else: for a in range(self.ppo.a_dim): add_histogram(writer, "Action/Dim" + str(a), actions[:, a], episode) try: writer.add_summary(graph_summary, episode) except NameError: pass writer.add_summary(worker_summary, episode) writer.flush() episode += 1 break self.env.close() print("Worker_%i finished" % self.wid)
# seperate subgraph of model training in tensorboard for better visualization # defining apply_gradients as a global step between workers with tf.name_scope('Model_Training'): global_step = tf.train.get_or_create_global_step() optimizer = tf.train.GradientDescentOptimizer(0.01) grads_and_vars = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # find the accuracy (used to plot test_accuracy) and summary writer initialization for tensorboard correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('Accuracy', accuracy) summary_var = tf.summary.merge_all() writer = SummaryWriterCache.get(log_dir) # create a monitored training session obect config = tf.ConfigProto(device_filters=['/job:ps', '/job:worker/task:%d' % FLAGS.task_index]) mts = tf.train.MonitoredTrainingSession(master=server.target, is_chief=FLAGS.task_index == 0, config=config) iterations = 0 with mts as sess: # run till test loss is more than the convergin_loss( loss that is desired) while True: (data_batch, label_batch) = \ mnist.train.next_batch(FLAGS.batch_size)