Beispiel #1
0
 def __init__(self, venv, logdir, info_keywords=(), **kwargs):
     """
     A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
     :param env: (Gym environment) The environment
     :param filename: (str) the location to save tensorboard logs
     :param info_keywords: (tuple) extra information to log, from the information return of environment.step
     """
     VecEnvWrapper.__init__(self, venv=venv, **kwargs)
     self.writer = FileWriter(logdir)
     self.info_keywords = info_keywords
     self.episode_infos = [dict() for _ in range(self.venv.num_envs)]
     self.total_steps = 0
Beispiel #2
0
 def __new__(cls, logdir):
     assert logdir is not None and logdir != "", \
         "need model_dir to initialize SummaryWriter"
     if SummaryWriter.__instance is None:
         SummaryWriter.__instance = super(SummaryWriter, cls).__new__(cls)
         fw = FileWriter(logdir, graph=ops.get_default_graph())
         setattr(SummaryWriter.__instance, "_summary_writer", fw)
         setattr(SummaryWriter.__instance, "add_graph", fw.add_graph)
         setattr(SummaryWriter.__instance, "add_meta_graph",
                 fw.add_meta_graph)
         setattr(SummaryWriter.__instance, "add_session_log",
                 fw.add_session_log)
     return SummaryWriter.__instance
Beispiel #3
0
    def get(logdir):
        """Returns the FileWriter for the specified directory.

    Args:
      logdir: str, name of the directory.

    Returns:
      A `FileWriter`.
    """
        with FileWriterCache._lock:
            if logdir not in FileWriterCache._cache:
                FileWriterCache._cache[logdir] = FileWriter(
                    logdir, graph=ops.get_default_graph())
            return FileWriterCache._cache[logdir]
    def __new__(cls, logdir):
        """ Creates a singleton instance.

        Args:
            logdir: A string, the directory for saving summaries.

        Returns: The instance.
        """
        assert logdir is not None and logdir != "", \
            "need model_dir to initialize SummaryWriter"
        if SummaryWriter.__instance is None:
            SummaryWriter.__instance = super(SummaryWriter, cls).__new__(cls)
            fw = FileWriter(logdir, graph=ops.get_default_graph())
            setattr(SummaryWriter.__instance, "_summary_writer", fw)
            setattr(SummaryWriter.__instance, "add_graph", fw.add_graph)
            setattr(SummaryWriter.__instance, "add_meta_graph",
                    fw.add_meta_graph)
            setattr(SummaryWriter.__instance, "add_session_log",
                    fw.add_session_log)
        return SummaryWriter.__instance
Beispiel #5
0
class TensorBoardLogger:
    def __init__(self, path):
        self._writer = FileWriter(path, flush_secs=120)

    def put_start(self, global_step):
        self._writer.add_session_log(SessionLog(status=SessionLog.START),
                                     global_step)

    def put_scalar(self, k, v, global_step):
        self._writer.add_summary(
            Summary(value=[Summary.Value(tag=k, simple_value=float(v))]),
            global_step)

    def flush(self):
        self._writer.flush()
Beispiel #6
0
def dump_tensorboard_summary(graph_executor, logdir):
    with FileWriter(logdir) as w:
        pb_graph = visualize(graph_executor)
        evt = event_pb2.Event(wall_time=time.time(),
                              graph_def=pb_graph.SerializeToString())
        w.add_event(evt)
Beispiel #7
0
 def __init__(self, path):
     self._writer = FileWriter(path, flush_secs=120)
Beispiel #8
0
 def getSession(self):
     "dummy session"
     session = FileWriter("run/session.txt")
     return session
Beispiel #9
0
    def __init__(self, scope, target_network, env, flags):
        """
        This class implements the Critic for the stochastic policy gradient model.
        The critic provides a state-value for the current state environment where 
        the agent operates.

        :param scope: within this scope the parameters will be defined
        :param target_network: instance of the Actor(target-network class)
        :param env: instance of the openAI environment
        :param FLAGS: TensorFlow flags which contain thevalues for hyperparameters

        """

        self.TF_FLAGS = flags
        self.env = env

        if scope == 'target':

            with tf.variable_scope(scope):

                self.states = tf.placeholder(tf.float32,
                                             shape=(None, self.env.state_size),
                                             name='states')
                self.actions = tf.placeholder(
                    tf.float32,
                    shape=(None, self.env.get_action_size()),
                    name='actions')
                self.q = self.create_network(scope='q_target_network')
                self.param = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES,
                    scope=scope + '/q_target_network')

        else:

            with tf.variable_scope(scope):

                # Add the target network instance
                self.target_network = target_network

                # Create the placeholders for the inputs to the network
                self.states = tf.placeholder(tf.float32,
                                             shape=(None,
                                                    self.env.get_state_size()),
                                             name='states')
                self.actions = tf.placeholder(
                    tf.float32,
                    shape=(None, self.env.get_action_size()),
                    name='actions')

                # Create the network with the goal of predicting the action-value function
                self.q = self.create_network(scope='q_network')
                self.q_targets = tf.placeholder(tf.float32,
                                                shape=(None, 1),
                                                name='q_targets')

                # The parameters of the network
                self.param = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES,
                    scope=scope + '/q_network')

                with tf.name_scope('q_network_loss'):
                    # Difference between targets value and calculated ones by the model
                    self.loss = tf.losses.mean_squared_error(
                        self.q_targets, self.q)

                with tf.name_scope('train_q_network'):
                    # Optimiser for the training of the critic network
                    self.train_opt = tf.train.AdamOptimizer(
                        self.TF_FLAGS.learning_rate_Critic).minimize(self.loss)

                with tf.name_scope('q_network_gradient'):
                    # Compute the gradients to be used for the actor model training
                    self.actor_loss = -tf.math.reduce_mean(self.q)
                    self.gradients = tf.gradients(self.actor_loss,
                                                  self.actions)

                with tf.name_scope('update_q_target'):
                    # Perform a soft update of the parameters: Critic network parameters = Local Parameters (LP) and Target network parameters (TP)
                    # TP = tau * LP + (1-tau) * TP
                    self.update_opt = [
                        tp.assign(
                            tf.multiply(self.TF_FLAGS.tau, lp) +
                            tf.multiply(1 - self.TF_FLAGS.tau, tp)) for tp, lp
                        in zip(self.target_network.param, self.param)
                    ]

                with tf.name_scope('initialize_q_target_network'):
                    # Set the parameters of the local network equal to the target one
                    # LP = TP
                    self.init_target_op = [
                        tp.assign(lp) for tp, lp in zip(
                            self.target_network.param, self.param)
                    ]

                FileWriter('logs/train', graph=self.train_opt.graph).close()
Beispiel #10
0
train_step = tf.train.AdamOptimizer().minimize(
    loss, global_step=tf.train.create_global_step())
tf.summary.scalar('loss', loss)

accuracy = tf.reduce_mean(
    tf.cast(tf.equal(tf.greater(logits, 0), tf.cast(labels, tf.bool)),
            tf.float32))
tf.summary.scalar('accuracy', accuracy)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    th = sess.run(trn_itr.string_handle())
    vh = sess.run(vld_itr.string_handle())

    merged = tf.summary.merge_all()
    trn_writer = FileWriter(os.path.join(model_folder, 'train'), sess.graph)
    vld_writer = FileWriter(os.path.join(model_folder, 'validation'))
    saver = Saver()
    profiler = Profiler(sess.graph)
    opts = (option_builder.ProfileOptionBuilder(
        option_builder.ProfileOptionBuilder.trainable_variables_parameter()).
            with_file_output(os.path.join(model_folder,
                                          'profile_model.txt')).build())
    profiler.profile_name_scope(options=opts)

    value_lv = None
    lv = tf.Summary()
    lv.value.add(tag='loss', simple_value=value_lv)
    value_av = None
    av = tf.Summary()
    av.value.add(tag='accuracy', simple_value=value_av)
Beispiel #11
0
class TBVecEnvWrapper(VecEnvWrapper):
    def __init__(self, venv, logdir, info_keywords=(), **kwargs):
        """
        A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
        :param env: (Gym environment) The environment
        :param filename: (str) the location to save tensorboard logs
        :param info_keywords: (tuple) extra information to log, from the information return of environment.step
        """
        VecEnvWrapper.__init__(self, venv=venv, **kwargs)
        self.writer = FileWriter(logdir)
        self.info_keywords = info_keywords
        self.episode_infos = [dict() for _ in range(self.venv.num_envs)]
        self.total_steps = 0

    def step_wait(self):
        """
        Step the environment with the given action
        :param action: ([int] or [float]) the action
        :return: ([int] or [float], [float], [bool], dict) observation, reward, done, information
        """
        obs, rews, dones, infos = self.venv.step_wait()

        for i in range(self.venv.num_envs):
            for key in self.info_keywords:
                if key not in infos[i]:
                    break
                if key in self.episode_infos[i]:
                    self.episode_infos[i][key].append(infos[i][key])
                else:
                    self.episode_infos[i][key] = [infos[i][key]]

            if dones[i]:
                # Compute data summaries.
                summary_values = []
                for key, value in self.episode_infos[i].items():
                    mean = np.mean(value)
                    std = np.std(value)
                    minimum = np.min(value)
                    maximum = np.max(value)
                    total = np.sum(value)

                    summary_values.extend([
                        tf.Summary.Value(tag="eval/" + key + "/mean",
                                         simple_value=mean),
                        tf.Summary.Value(tag="eval/" + key + "/std",
                                         simple_value=std),
                        tf.Summary.Value(tag="eval/" + key + "/min",
                                         simple_value=minimum),
                        tf.Summary.Value(tag="eval/" + key + "/max",
                                         simple_value=maximum),
                        tf.Summary.Value(tag="eval/" + key + "/sum",
                                         simple_value=total),
                        tf.Summary.Value(tag="eval/" + key + "/initial",
                                         simple_value=value[0]),
                        tf.Summary.Value(tag="eval/" + key + "/final",
                                         simple_value=value[-1]),
                    ])
                summary = tf.Summary(value=summary_values)
                self.writer.add_summary(summary, self.total_steps + i)

                # Clear the episode_infos dictionary
                self.episode_infos[i] = dict()

        self.total_steps += self.venv.num_envs
        return obs, rews, dones, infos

    def reset(self, **kwargs):
        """
        Invokes the reset method of the underlying environment, passing along any keywords.
        """
        return self.venv.reset(**kwargs)

    def close(self):
        """
        Closes the FileWriter and the underlying environment.
        """
        if self.writer is not None:
            self.writer.flush()
            self.writer.close()

        self.env.close()
saver = tf.train.Saver()

#cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf.square(y_ * tf.log(y)), reduction_indices=[1]))
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(tf.clip_by_value(y,1e-2,1.0))))

# cross_entropy = tf.reduce_mean(
#       tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
# cross_entropy = tf.losses.mean_pairwise_squared_error(y, y_)
cross_entropy = tf.reduce_mean(tf.abs(y_ - y))


accuracy = tf.reduce_mean(tf.abs(y -y_))
summary_accuracy = tf.summary.scalar("accuracy", accuracy)
train_step = tf.train.GradientDescentOptimizer(0.0007).minimize(cross_entropy)
fw = FileWriter("log", sess.graph)


sess.run(tf.global_variables_initializer())
try:
    saver.restore(sess, MODEL_NAME)
except NotFoundError:
    print("no model found, creating new model")
merged = tf.summary.merge_all()

for _ in range(0, 500):

    batch_x, batch_y = data_gen_batch(20)

    sess.run(train_step, feed_dict={x: batch_x, y_: batch_y})