def __init__(self, venv, logdir, info_keywords=(), **kwargs): """ A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data. :param env: (Gym environment) The environment :param filename: (str) the location to save tensorboard logs :param info_keywords: (tuple) extra information to log, from the information return of environment.step """ VecEnvWrapper.__init__(self, venv=venv, **kwargs) self.writer = FileWriter(logdir) self.info_keywords = info_keywords self.episode_infos = [dict() for _ in range(self.venv.num_envs)] self.total_steps = 0
def __new__(cls, logdir): assert logdir is not None and logdir != "", \ "need model_dir to initialize SummaryWriter" if SummaryWriter.__instance is None: SummaryWriter.__instance = super(SummaryWriter, cls).__new__(cls) fw = FileWriter(logdir, graph=ops.get_default_graph()) setattr(SummaryWriter.__instance, "_summary_writer", fw) setattr(SummaryWriter.__instance, "add_graph", fw.add_graph) setattr(SummaryWriter.__instance, "add_meta_graph", fw.add_meta_graph) setattr(SummaryWriter.__instance, "add_session_log", fw.add_session_log) return SummaryWriter.__instance
def get(logdir): """Returns the FileWriter for the specified directory. Args: logdir: str, name of the directory. Returns: A `FileWriter`. """ with FileWriterCache._lock: if logdir not in FileWriterCache._cache: FileWriterCache._cache[logdir] = FileWriter( logdir, graph=ops.get_default_graph()) return FileWriterCache._cache[logdir]
def __new__(cls, logdir): """ Creates a singleton instance. Args: logdir: A string, the directory for saving summaries. Returns: The instance. """ assert logdir is not None and logdir != "", \ "need model_dir to initialize SummaryWriter" if SummaryWriter.__instance is None: SummaryWriter.__instance = super(SummaryWriter, cls).__new__(cls) fw = FileWriter(logdir, graph=ops.get_default_graph()) setattr(SummaryWriter.__instance, "_summary_writer", fw) setattr(SummaryWriter.__instance, "add_graph", fw.add_graph) setattr(SummaryWriter.__instance, "add_meta_graph", fw.add_meta_graph) setattr(SummaryWriter.__instance, "add_session_log", fw.add_session_log) return SummaryWriter.__instance
class TensorBoardLogger: def __init__(self, path): self._writer = FileWriter(path, flush_secs=120) def put_start(self, global_step): self._writer.add_session_log(SessionLog(status=SessionLog.START), global_step) def put_scalar(self, k, v, global_step): self._writer.add_summary( Summary(value=[Summary.Value(tag=k, simple_value=float(v))]), global_step) def flush(self): self._writer.flush()
def dump_tensorboard_summary(graph_executor, logdir): with FileWriter(logdir) as w: pb_graph = visualize(graph_executor) evt = event_pb2.Event(wall_time=time.time(), graph_def=pb_graph.SerializeToString()) w.add_event(evt)
def __init__(self, path): self._writer = FileWriter(path, flush_secs=120)
def getSession(self): "dummy session" session = FileWriter("run/session.txt") return session
def __init__(self, scope, target_network, env, flags): """ This class implements the Critic for the stochastic policy gradient model. The critic provides a state-value for the current state environment where the agent operates. :param scope: within this scope the parameters will be defined :param target_network: instance of the Actor(target-network class) :param env: instance of the openAI environment :param FLAGS: TensorFlow flags which contain thevalues for hyperparameters """ self.TF_FLAGS = flags self.env = env if scope == 'target': with tf.variable_scope(scope): self.states = tf.placeholder(tf.float32, shape=(None, self.env.state_size), name='states') self.actions = tf.placeholder( tf.float32, shape=(None, self.env.get_action_size()), name='actions') self.q = self.create_network(scope='q_target_network') self.param = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/q_target_network') else: with tf.variable_scope(scope): # Add the target network instance self.target_network = target_network # Create the placeholders for the inputs to the network self.states = tf.placeholder(tf.float32, shape=(None, self.env.get_state_size()), name='states') self.actions = tf.placeholder( tf.float32, shape=(None, self.env.get_action_size()), name='actions') # Create the network with the goal of predicting the action-value function self.q = self.create_network(scope='q_network') self.q_targets = tf.placeholder(tf.float32, shape=(None, 1), name='q_targets') # The parameters of the network self.param = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/q_network') with tf.name_scope('q_network_loss'): # Difference between targets value and calculated ones by the model self.loss = tf.losses.mean_squared_error( self.q_targets, self.q) with tf.name_scope('train_q_network'): # Optimiser for the training of the critic network self.train_opt = tf.train.AdamOptimizer( self.TF_FLAGS.learning_rate_Critic).minimize(self.loss) with tf.name_scope('q_network_gradient'): # Compute the gradients to be used for the actor model training self.actor_loss = -tf.math.reduce_mean(self.q) self.gradients = tf.gradients(self.actor_loss, self.actions) with tf.name_scope('update_q_target'): # Perform a soft update of the parameters: Critic network parameters = Local Parameters (LP) and Target network parameters (TP) # TP = tau * LP + (1-tau) * TP self.update_opt = [ tp.assign( tf.multiply(self.TF_FLAGS.tau, lp) + tf.multiply(1 - self.TF_FLAGS.tau, tp)) for tp, lp in zip(self.target_network.param, self.param) ] with tf.name_scope('initialize_q_target_network'): # Set the parameters of the local network equal to the target one # LP = TP self.init_target_op = [ tp.assign(lp) for tp, lp in zip( self.target_network.param, self.param) ] FileWriter('logs/train', graph=self.train_opt.graph).close()
train_step = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.create_global_step()) tf.summary.scalar('loss', loss) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.greater(logits, 0), tf.cast(labels, tf.bool)), tf.float32)) tf.summary.scalar('accuracy', accuracy) with tf.Session() as sess: tf.global_variables_initializer().run() th = sess.run(trn_itr.string_handle()) vh = sess.run(vld_itr.string_handle()) merged = tf.summary.merge_all() trn_writer = FileWriter(os.path.join(model_folder, 'train'), sess.graph) vld_writer = FileWriter(os.path.join(model_folder, 'validation')) saver = Saver() profiler = Profiler(sess.graph) opts = (option_builder.ProfileOptionBuilder( option_builder.ProfileOptionBuilder.trainable_variables_parameter()). with_file_output(os.path.join(model_folder, 'profile_model.txt')).build()) profiler.profile_name_scope(options=opts) value_lv = None lv = tf.Summary() lv.value.add(tag='loss', simple_value=value_lv) value_av = None av = tf.Summary() av.value.add(tag='accuracy', simple_value=value_av)
class TBVecEnvWrapper(VecEnvWrapper): def __init__(self, venv, logdir, info_keywords=(), **kwargs): """ A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data. :param env: (Gym environment) The environment :param filename: (str) the location to save tensorboard logs :param info_keywords: (tuple) extra information to log, from the information return of environment.step """ VecEnvWrapper.__init__(self, venv=venv, **kwargs) self.writer = FileWriter(logdir) self.info_keywords = info_keywords self.episode_infos = [dict() for _ in range(self.venv.num_envs)] self.total_steps = 0 def step_wait(self): """ Step the environment with the given action :param action: ([int] or [float]) the action :return: ([int] or [float], [float], [bool], dict) observation, reward, done, information """ obs, rews, dones, infos = self.venv.step_wait() for i in range(self.venv.num_envs): for key in self.info_keywords: if key not in infos[i]: break if key in self.episode_infos[i]: self.episode_infos[i][key].append(infos[i][key]) else: self.episode_infos[i][key] = [infos[i][key]] if dones[i]: # Compute data summaries. summary_values = [] for key, value in self.episode_infos[i].items(): mean = np.mean(value) std = np.std(value) minimum = np.min(value) maximum = np.max(value) total = np.sum(value) summary_values.extend([ tf.Summary.Value(tag="eval/" + key + "/mean", simple_value=mean), tf.Summary.Value(tag="eval/" + key + "/std", simple_value=std), tf.Summary.Value(tag="eval/" + key + "/min", simple_value=minimum), tf.Summary.Value(tag="eval/" + key + "/max", simple_value=maximum), tf.Summary.Value(tag="eval/" + key + "/sum", simple_value=total), tf.Summary.Value(tag="eval/" + key + "/initial", simple_value=value[0]), tf.Summary.Value(tag="eval/" + key + "/final", simple_value=value[-1]), ]) summary = tf.Summary(value=summary_values) self.writer.add_summary(summary, self.total_steps + i) # Clear the episode_infos dictionary self.episode_infos[i] = dict() self.total_steps += self.venv.num_envs return obs, rews, dones, infos def reset(self, **kwargs): """ Invokes the reset method of the underlying environment, passing along any keywords. """ return self.venv.reset(**kwargs) def close(self): """ Closes the FileWriter and the underlying environment. """ if self.writer is not None: self.writer.flush() self.writer.close() self.env.close()
saver = tf.train.Saver() #cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf.square(y_ * tf.log(y)), reduction_indices=[1])) #cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(tf.clip_by_value(y,1e-2,1.0)))) # cross_entropy = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) # cross_entropy = tf.losses.mean_pairwise_squared_error(y, y_) cross_entropy = tf.reduce_mean(tf.abs(y_ - y)) accuracy = tf.reduce_mean(tf.abs(y -y_)) summary_accuracy = tf.summary.scalar("accuracy", accuracy) train_step = tf.train.GradientDescentOptimizer(0.0007).minimize(cross_entropy) fw = FileWriter("log", sess.graph) sess.run(tf.global_variables_initializer()) try: saver.restore(sess, MODEL_NAME) except NotFoundError: print("no model found, creating new model") merged = tf.summary.merge_all() for _ in range(0, 500): batch_x, batch_y = data_gen_batch(20) sess.run(train_step, feed_dict={x: batch_x, y_: batch_y})