Esempio n. 1
0
def time_used_wrap(name, func, *args, **kwargs):
    start_time = time.time()
    output = func(*args, **kwargs)
    end_time = time.time()
    time_used = end_time - start_time
    logger.info("[test] func {0} time used {1:.2f}".format(name, time_used))
    logger.record_tabular("time_used/{}".format(name), time_used)
    logger.dump_tabular()
    return output
Esempio n. 2
0
 def update_fph(self, cum_epochs):
     if self.last_record_fph_time is None:
         self.last_record_fph_time = time.time()
     else:
         cur_time = time.time()
         duration = (cur_time - self.last_record_fph_time) / 60 / 60
         fph = cum_epochs / duration
         logger.record_tabular('fph', fph)
         # self.last_record_fph_time = cur_time
         logger.dump_tabular()
Esempio n. 3
0
    def add_summary_to_logger(self,
                              summary,
                              name='',
                              simple_val=False,
                              freq=20):
        """
        [deprecated] see RLA.logger.log_from_tf_summary
        """
        logger.warn(
            "add_summary_to_logger is deprecated. See RLA.logger.log_from_tf_summary."
        )
        if "tensorboard" not in self.private_config["LOG_USED"]:
            logger.info("skip adding summary to tb")
            return
        if name not in self.summary_add_dict:
            self.summary_add_dict[name] = []
        if freq > 0:
            summary_ts = int(self.time_step_holder.get_time() / freq)
        else:
            summary_ts = 0
        if freq <= 0 or summary_ts not in self.summary_add_dict[name]:
            from tensorflow.core.framework import summary_pb2
            summ = summary_pb2.Summary()
            summ.ParseFromString(summary)
            if simple_val:
                list_field = summ.ListFields()

                def recursion_util(inp_field):
                    if hasattr(inp_field, "__getitem__"):
                        for inp in inp_field:
                            recursion_util(inp)
                    elif hasattr(inp_field, 'simple_value'):
                        logger.record_tabular(name + '/' + inp_field.tag,
                                              inp_field.simple_value)
                    else:
                        pass

                recursion_util(list_field)
                logger.dump_tabular()
            else:
                self.writer.add_summary(summary,
                                        self.time_step_holder.get_time())
                self.writer.flush()
            self.summary_add_dict[name].append(summary_ts)
Esempio n. 4
0
def total_episode_reward_logger(rew_acc, rewards, masks,
                                # writer,
                                steps):
    """
    calculates the cumulated episode reward, and prints to tensorflow log the output

    :param rew_acc: (np.array float) the total running reward
    :param rewards: (np.array float) the rewards
    :param masks: (np.array bool) the end of episodes
    # :param writer: (TensorFlow Session.writer) the writer to log to
    :param steps: (int) the current timestep
    :return: (np.array float) the updated total running reward
    :return: (np.array float) the updated total running reward
    """
    with tf.variable_scope("environment_info", reuse=True):
        for env_idx in range(rewards.shape[0]):
            dones_idx = np.sort(np.argwhere(masks[env_idx]))

            if len(dones_idx) == 0:
                rew_acc[env_idx] += sum(rewards[env_idx])
            else:
                rew_acc[env_idx] += sum(rewards[env_idx, :dones_idx[0, 0]])
                time_step_holder.set_time(steps + dones_idx[0, 0])
                # [RLA] phase 3: log
                logger.record_tabular('perf/episode_reward', rew_acc[env_idx])
                # summary = tf.Summary(value=[tf.Summary.Value(tag="episode_reward", simple_value=rew_acc[env_idx])])
                # writer.add_summary(summary, steps + dones_idx[0, 0])
                for k in range(1, len(dones_idx[:, 0])):
                    rew_acc[env_idx] = sum(rewards[env_idx, dones_idx[k - 1, 0]:dones_idx[k, 0]])
                    time_step_holder.set_time(steps + dones_idx[k, 0])
                    # [RLA] phase 3: log
                    logger.record_tabular('perf/episode_reward', rew_acc[env_idx])
                    # summary = tf.Summary(value=[tf.Summary.Value(tag="episode_reward", simple_value=rew_acc[env_idx])])
                    # writer.add_summary(summary, steps + dones_idx[k, 0])
                rew_acc[env_idx] = sum(rewards[env_idx, dones_idx[-1, 0]:])
                logger.dump_tabular()
    return rew_acc
Esempio n. 5
0
def target_func(x):
    return np.tanh(np.mean(x, axis=-1, keepdims=True))


for i in range(start_epoch, 1000):
    exp_manager.time_step_holder.set_time(i)
    x_input = np.random.normal(0, 3, [64, kwargs["input_size"]])
    y = target_func(x_input)
    loss_out, y_pred = sess.run([loss, out, opt],
                                feed_dict={
                                    X_ph: x_input,
                                    y_ph: y
                                })[:-1]
    logger.ma_record_tabular("perf/mse", loss_out, 10)
    logger.record_tabular("y_out", np.mean(y))
    logger.dump_tabular()
    if i % 100 == 0:
        exp_manager.save_checkpoint()
    if i % 10 == 0:

        def plot_func():
            import matplotlib.pyplot as plt
            testX = np.repeat(np.expand_dims(np.arange(-10, 10, 0.1), axis=-1),
                              repeats=16,
                              axis=-1)
            testY = target_func(testX)
            predY = sess.run(out, feed_dict={X_ph: testX})
            plt.plot(testX.mean(axis=-1), predY.mean(axis=-1), label='pred')
            plt.plot(testX.mean(axis=-1), testY.mean(axis=-1), label='real')

        mpr.pretty_plot_wrapper('react_func',
Esempio n. 6
0
    def train(self,
              inputs,
              targets,
              batch_size=32,
              max_epochs=None,
              max_epochs_since_update=5,
              hide_progress=False,
              holdout_ratio=0.0,
              max_logging=1000,
              max_grad_updates=None,
              timer=None,
              max_t=None):
        """Trains/Continues network training

        Arguments:
            inputs (np.ndarray): Network inputs in the training dataset in rows.
            targets (np.ndarray): Network target outputs in the training dataset in rows corresponding
                to the rows in inputs.
            batch_size (int): The minibatch size to be used for training.
            epochs (int): Number of epochs (full network passes that will be done.
            hide_progress (bool): If True, hides the progress bar shown at the beginning of training.

        Returns: None
        """
        self._max_epochs_since_update = max_epochs_since_update
        self._start_train()
        break_train = False

        def shuffle_rows(arr):
            idxs = np.argsort(np.random.uniform(size=arr.shape), axis=-1)
            return arr[np.arange(arr.shape[0])[:, None], idxs]

        # Split into training and holdout sets
        num_holdout = min(int(inputs.shape[0] * holdout_ratio), max_logging)
        permutation = np.random.permutation(inputs.shape[0])
        inputs, holdout_inputs = inputs[permutation[num_holdout:]], inputs[
            permutation[:num_holdout]]
        targets, holdout_targets = targets[permutation[num_holdout:]], targets[
            permutation[:num_holdout]]
        holdout_inputs = np.tile(holdout_inputs[None], [self.num_nets, 1, 1])
        holdout_targets = np.tile(holdout_targets[None], [self.num_nets, 1, 1])

        print('[ BNN ] Training {} | Holdout: {}'.format(
            inputs.shape, holdout_inputs.shape))
        with self.sess.as_default():
            self.scaler.fit(inputs)

        idxs = np.random.randint(inputs.shape[0],
                                 size=[self.num_nets, inputs.shape[0]])
        if hide_progress:
            progress = Silent()
        else:
            progress = Progress(max_epochs)

        if max_epochs is not None:
            epoch_iter = range(max_epochs)
        else:
            epoch_iter = itertools.count()

        # else:
        #     epoch_range = trange(epochs, unit="epoch(s)", desc="Network training")

        t0 = time.time()
        grad_updates = 0
        for epoch in epoch_iter:
            tester.time_step_holder.set_time(epoch)
            for batch_num in range(int(np.ceil(idxs.shape[-1] / batch_size))):
                batch_idxs = idxs[:, batch_num * batch_size:(batch_num + 1) *
                                  batch_size]
                self.sess.run(self.train_op,
                              feed_dict={
                                  self.sy_train_in: inputs[batch_idxs],
                                  self.sy_train_targ: targets[batch_idxs]
                              })
                grad_updates += 1

            idxs = shuffle_rows(idxs)
            if not hide_progress:
                if holdout_ratio < 1e-12:
                    losses = self.sess.run(self.mse_loss,
                                           feed_dict={
                                               self.sy_train_in:
                                               inputs[idxs[:, :max_logging]],
                                               self.sy_train_targ:
                                               targets[idxs[:, :max_logging]]
                                           })
                    named_losses = [['M{}'.format(i), losses[i]]
                                    for i in range(len(losses))]
                    progress.set_description(named_losses)
                else:
                    losses = self.sess.run(self.mse_loss,
                                           feed_dict={
                                               self.sy_train_in:
                                               inputs[idxs[:, :max_logging]],
                                               self.sy_train_targ:
                                               targets[idxs[:, :max_logging]]
                                           })
                    holdout_losses = self.sess.run(self.mse_loss,
                                                   feed_dict={
                                                       self.sy_train_in:
                                                       holdout_inputs,
                                                       self.sy_train_targ:
                                                       holdout_targets
                                                   })
                    named_losses = [['M{}'.format(i), losses[i]]
                                    for i in range(len(losses))]
                    for i in range(len(losses)):
                        logger.ma_record_tabular("M/{}".format(i), losses[i],
                                                 100)
                        logger.ma_record_tabular("V/{}".format(i),
                                                 holdout_losses[i], 100)

                    named_holdout_losses = [[
                        'V{}'.format(i), holdout_losses[i]
                    ] for i in range(len(holdout_losses))]
                    named_losses = named_losses + named_holdout_losses + [[
                        'T', time.time() - t0
                    ]]
                    progress.set_description(named_losses)

                    break_train = self._save_best(epoch, holdout_losses)
            if epoch % 10 == 0:
                logger.dump_tabular()

            progress.update()
            t = time.time() - t0
            if break_train or (max_grad_updates
                               and grad_updates > max_grad_updates):
                break
            if max_t and t > max_t:
                descr = 'Breaking because of timeout: {}! (max: {})'.format(
                    t, max_t)
                progress.append_description(descr)
                # print('Breaking because of timeout: {}! | (max: {})\n'.format(t, max_t))
                # time.sleep(5)
                break

        progress.stamp()
        if timer: timer.stamp('bnn_train')

        self._set_state()
        if timer: timer.stamp('bnn_set_state')

        holdout_losses = self.sess.run(self.mse_loss,
                                       feed_dict={
                                           self.sy_train_in: holdout_inputs,
                                           self.sy_train_targ: holdout_targets
                                       })

        if timer: timer.stamp('bnn_holdout')

        self._end_train(holdout_losses)
        if timer: timer.stamp('bnn_end')

        val_loss = (np.sort(holdout_losses)[:self.num_elites]).mean()
        model_metrics = {'val_loss': val_loss}
        print('[ BNN ] Holdout', np.sort(holdout_losses), model_metrics)
        return OrderedDict(model_metrics)