Esempio n. 1
0
def time_record_end(name):
    end_time = time.time()
    start_time = rc_start_time[name]
    logger.record_tabular("time_used/{}".format(name), end_time - start_time)
    logger.info("[test] func {0} time used {1:.2f}".format(
        name, end_time - start_time))
    del rc_start_time[name]
Esempio n. 2
0
 def recursion_util(inp_field):
     if hasattr(inp_field, "__getitem__"):
         for inp in inp_field:
             recursion_util(inp)
     elif hasattr(inp_field, 'simple_value'):
         logger.record_tabular(name + '/' + inp_field.tag,
                               inp_field.simple_value)
     else:
         pass
Esempio n. 3
0
def time_used_wrap(name, func, *args, **kwargs):
    start_time = time.time()
    output = func(*args, **kwargs)
    end_time = time.time()
    time_used = end_time - start_time
    logger.info("[test] func {0} time used {1:.2f}".format(name, time_used))
    logger.record_tabular("time_used/{}".format(name), time_used)
    logger.dump_tabular()
    return output
Esempio n. 4
0
 def update_fph(self, cum_epochs):
     if self.last_record_fph_time is None:
         self.last_record_fph_time = time.time()
     else:
         cur_time = time.time()
         duration = (cur_time - self.last_record_fph_time) / 60 / 60
         fph = cum_epochs / duration
         logger.record_tabular('fph', fph)
         # self.last_record_fph_time = cur_time
         logger.dump_tabular()
Esempio n. 5
0
def total_episode_reward_logger(rew_acc, rewards, masks,
                                # writer,
                                steps):
    """
    calculates the cumulated episode reward, and prints to tensorflow log the output

    :param rew_acc: (np.array float) the total running reward
    :param rewards: (np.array float) the rewards
    :param masks: (np.array bool) the end of episodes
    # :param writer: (TensorFlow Session.writer) the writer to log to
    :param steps: (int) the current timestep
    :return: (np.array float) the updated total running reward
    :return: (np.array float) the updated total running reward
    """
    with tf.variable_scope("environment_info", reuse=True):
        for env_idx in range(rewards.shape[0]):
            dones_idx = np.sort(np.argwhere(masks[env_idx]))

            if len(dones_idx) == 0:
                rew_acc[env_idx] += sum(rewards[env_idx])
            else:
                rew_acc[env_idx] += sum(rewards[env_idx, :dones_idx[0, 0]])
                time_step_holder.set_time(steps + dones_idx[0, 0])
                # [RLA] phase 3: log
                logger.record_tabular('perf/episode_reward', rew_acc[env_idx])
                # summary = tf.Summary(value=[tf.Summary.Value(tag="episode_reward", simple_value=rew_acc[env_idx])])
                # writer.add_summary(summary, steps + dones_idx[0, 0])
                for k in range(1, len(dones_idx[:, 0])):
                    rew_acc[env_idx] = sum(rewards[env_idx, dones_idx[k - 1, 0]:dones_idx[k, 0]])
                    time_step_holder.set_time(steps + dones_idx[k, 0])
                    # [RLA] phase 3: log
                    logger.record_tabular('perf/episode_reward', rew_acc[env_idx])
                    # summary = tf.Summary(value=[tf.Summary.Value(tag="episode_reward", simple_value=rew_acc[env_idx])])
                    # writer.add_summary(summary, steps + dones_idx[k, 0])
                rew_acc[env_idx] = sum(rewards[env_idx, dones_idx[-1, 0]:])
                logger.dump_tabular()
    return rew_acc
Esempio n. 6
0
 def print_custom_data(self, key, prefix=''):
     assert key in self.custom_data
     import numpy as np
     mean_val = np.mean(self.custom_data[key])
     logger.record_tabular(prefix + key, mean_val)
Esempio n. 7
0
def target_func(x):
    return np.tanh(np.mean(x, axis=-1, keepdims=True))


for i in range(start_epoch, 1000):
    exp_manager.time_step_holder.set_time(i)
    x_input = np.random.normal(0, 3, [64, kwargs["input_size"]])
    y = target_func(x_input)
    loss_out, y_pred = sess.run([loss, out, opt],
                                feed_dict={
                                    X_ph: x_input,
                                    y_ph: y
                                })[:-1]
    logger.ma_record_tabular("perf/mse", loss_out, 10)
    logger.record_tabular("y_out", np.mean(y))
    logger.dump_tabular()
    if i % 100 == 0:
        exp_manager.save_checkpoint()
    if i % 10 == 0:

        def plot_func():
            import matplotlib.pyplot as plt
            testX = np.repeat(np.expand_dims(np.arange(-10, 10, 0.1), axis=-1),
                              repeats=16,
                              axis=-1)
            testY = target_func(testX)
            predY = sess.run(out, feed_dict={X_ph: testX})
            plt.plot(testX.mean(axis=-1), predY.mean(axis=-1), label='pred')
            plt.plot(testX.mean(axis=-1), testY.mean(axis=-1), label='real')