Ejemplo n.º 1
0
    def train(self):
        self._start_train_batch()

        logger.info('Training model')

        alg_args = self._params['alg']
        total_steps = int(alg_args['total_steps'])
        save_every_n_steps = int(alg_args['save_every_n_steps'])
        update_target_after_n_steps = int(
            alg_args['update_target_after_n_steps'])
        update_target_every_n_steps = int(
            alg_args['update_target_every_n_steps'])
        log_every_n_steps = int(alg_args['log_every_n_steps'])

        timeit.reset()
        timeit.start('total')
        save_itr = 0
        for step in range(total_steps):
            timeit.start('sample')
            # steps, observations, actions, rewards, dones, _ = self._replay_pool.sample(batch_size)
            steps, observations, actions, rewards, dones, _ = self._batch_queue.get(
            )
            timeit.stop('sample')
            timeit.start('train')
            self._model.train_step(step,
                                   steps=steps,
                                   observations=observations,
                                   actions=actions,
                                   rewards=rewards,
                                   dones=dones,
                                   use_target=True)
            timeit.stop('train')

            ### update target network
            if step > update_target_after_n_steps and step % update_target_every_n_steps == 0:
                self._model.update_target()

            ### log
            if step > 0 and step % log_every_n_steps == 0:
                logger.record_tabular('Step', step)
                self._model.log()
                logger.dump_tabular(print_func=logger.info)

                timeit.stop('total')
                for line in str(timeit).split('\n'):
                    logger.debug(line)
                timeit.reset()
                timeit.start('total')

            ### save model
            if step > 0 and step % save_every_n_steps == 0:
                logger.info('Saving files for itr {0}'.format(save_itr))
                self._save_train_policy(save_itr)
                save_itr += 1

        ### always save the end
        self._save_train_policy(save_itr)

        self._stop_train_batch()
Ejemplo n.º 2
0
    def _train_model(self):
        for step in range(self._train_steps):
            self._trav_graph.train_step()

            if step % self._eval_every_n_steps == 0:
                self._trav_graph.holdout_cost()

            if step > 0 and step % self._log_every_n_steps == 0:
                logger.record_tabular('step', step)
                self._trav_graph.log()
                logger.dump_tabular(print_func=logger.info)

            if step > 0 and step % self._save_every_n_steps == 0:
                self._trav_graph.save()
Ejemplo n.º 3
0
 def _run_log(self, step):
     logger.record_tabular('Step', step)
     self._env.log()
     self._replay_pool.log()
     if self._env_eval:
         self._env_eval.log(prefix='Eval')
     if self._replay_pool_eval:
         self._replay_pool_eval.log(prefix='Eval')
     self._policy.log()
     logger.dump_tabular(print_func=logger.info)
     timeit.stop('total')
     for line in str(timeit).split('\n'):
         logger.debug(line)
     timeit.reset()
     timeit.start('total')
Ejemplo n.º 4
0
 def log(self):
     for k in sorted(self._log_stats.keys()):
         if k == 'Depth':
             logger.record_tabular(k + 'Mean', np.mean(self._log_stats[k]))
             logger.record_tabular(k + 'Std', np.std(self._log_stats[k]))
         else:
             logger.record_tabular(k, np.mean(self._log_stats[k]))
     self._log_stats.clear()
Ejemplo n.º 5
0
    def train_model(self):
        ### create graph
        trav_graph = TraversabilityGraph(self._obs_shape, self._save_folder,
                                         **labeller_params)

        train_steps = int(labeller_params['train_steps'])
        eval_every_n_steps = int(labeller_params['eval_every_n_steps'])
        log_every_n_steps = int(labeller_params['log_every_n_steps'])
        save_every_n_steps = int(labeller_params['save_every_n_steps'])

        for step in range(train_steps):
            trav_graph.train_step()

            if step % eval_every_n_steps == 0:
                trav_graph.holdout_cost()

            if step > 0 and step % log_every_n_steps == 0:
                logger.record_tabular('step', step)
                trav_graph.log()
                logger.dump_tabular(print_func=logger.info)

            if step > 0 and step % save_every_n_steps == 0:
                trav_graph.save()
Ejemplo n.º 6
0
 def log(self, prefix=''):
     for key in sorted(self._log_stats.keys()):
         logger.record_tabular('{0}{1}Mean'.format(prefix, key), np.mean(self._log_stats[key]))
         logger.record_tabular('{0}{1}Std'.format(prefix, key), np.std(self._log_stats[key]))
     self._log_stats = defaultdict(list)
Ejemplo n.º 7
0
    def log_pools(replay_pools, prefix=''):
        def join(l):
            return list(itertools.chain(*l))

        all_log_stats = [
            replay_pool.get_log_stats() for replay_pool in replay_pools
        ]
        log_stats = defaultdict(list)
        for k in all_log_stats[0].keys():
            log_stats[k] = join([ls[k] for ls in all_log_stats])
        logger.record_tabular(prefix + 'CumRewardMean',
                              np.mean(log_stats['CumReward']))
        logger.record_tabular(prefix + 'CumRewardStd',
                              np.std(log_stats['CumReward']))
        logger.record_tabular(prefix + 'AvgRewardMean',
                              np.mean(log_stats['AvgReward']))
        logger.record_tabular(prefix + 'AvgRewardStd',
                              np.std(log_stats['AvgReward']))
        logger.record_tabular(prefix + 'FinalRewardMean',
                              np.mean(log_stats['FinalReward']))
        logger.record_tabular(prefix + 'FinalRewardStd',
                              np.std(log_stats['FinalReward']))
        logger.record_tabular(prefix + 'EpisodeLengthMean',
                              np.mean(log_stats['EpisodeLength']))
        logger.record_tabular(prefix + 'EpisodeLengthStd',
                              np.std(log_stats['EpisodeLength']))

        logger.record_tabular(prefix + 'AvgCollision',
                              np.mean(log_stats['AvgCollision']))
        logger.record_tabular(prefix + 'NumEpisodes',
                              len(log_stats['EpisodeLength']))
        logger.record_tabular(prefix + 'Time', np.mean(log_stats['Time']))
Ejemplo n.º 8
0
    def train(self):
        ### restore where we left off
        save_itr = self._restore()

        target_updated = False
        eval_rollouts = []

        self._sampler.reset()
        if self._eval_sampler is not None:
            self._eval_sampler.reset()

        timeit.reset()
        timeit.start('total')
        for step in range(0, self._total_steps, self._sampler.n_envs):
            ### sample and add to buffer
            if step > self._sample_after_n_steps:
                timeit.start('sample')
                self._sampler.step(
                    step,
                    take_random_actions=(step <= self._onpolicy_after_n_steps),
                    explore=True)
                timeit.stop('sample')

            ### sample and DON'T add to buffer (for validation)
            if self._eval_sampler is not None and step > 0 and step % self._eval_every_n_steps == 0:
                timeit.start('eval')
                for _ in range(self._rollouts_per_eval):
                    eval_rollouts_step = []
                    eval_step = step
                    while len(eval_rollouts_step) == 0:
                        self._eval_sampler.step(eval_step, explore=False)
                        eval_rollouts_step = self._eval_sampler.get_recent_paths(
                        )
                        eval_step += 1
                    eval_rollouts += eval_rollouts_step
                timeit.stop('eval')

            if step >= self._learn_after_n_steps:
                ### training step
                if self._train_every_n_steps >= 1:
                    if step % int(self._train_every_n_steps) == 0:
                        timeit.start('batch')
                        steps, observations, goals, actions, rewards, dones, _ = \
                            self._sampler.sample(self._batch_size)
                        timeit.stop('batch')
                        timeit.start('train')
                        self._policy.train_step(step,
                                                steps=steps,
                                                observations=observations,
                                                goals=goals,
                                                actions=actions,
                                                rewards=rewards,
                                                dones=dones,
                                                use_target=target_updated)
                        timeit.stop('train')
                else:
                    for _ in range(int(1. / self._train_every_n_steps)):
                        timeit.start('batch')
                        steps, observations, goals, actions, rewards, dones, _ = \
                            self._sampler.sample(self._batch_size)
                        timeit.stop('batch')
                        timeit.start('train')
                        self._policy.train_step(step,
                                                steps=steps,
                                                observations=observations,
                                                goals=goals,
                                                actions=actions,
                                                rewards=rewards,
                                                dones=dones,
                                                use_target=target_updated)
                        timeit.stop('train')

                ### update target network
                if step > self._update_target_after_n_steps and step % self._update_target_every_n_steps == 0:
                    self._policy.update_target()
                    target_updated = True

                ### log
                if step % self._log_every_n_steps == 0:
                    logger.record_tabular('Step', step)
                    self._sampler.log()
                    self._eval_sampler.log(prefix='Eval')
                    self._policy.log()
                    logger.dump_tabular(print_func=logger.info)
                    timeit.stop('total')
                    for line in str(timeit).split('\n'):
                        logger.debug(line)
                    timeit.reset()
                    timeit.start('total')

            ### save model
            if step > 0 and step % self._save_every_n_steps == 0:
                logger.info('Saving files for itr {0}'.format(save_itr))
                self._save(save_itr, self._sampler.get_recent_paths(),
                           eval_rollouts)
                save_itr += 1
                eval_rollouts = []

        self._save(save_itr, self._sampler.get_recent_paths(), eval_rollouts)
Ejemplo n.º 9
0
    def inference(self):
        ### restore where we left off
        self._restore_inference()
        inference_itr = self._get_inference_itr()
        inference_step = self._get_inference_step()
        train_itr = self._get_train_itr()

        self._run_rsync()

        train_rollouts = []
        eval_rollouts = []

        self._inference_reset_sampler()

        timeit.reset()
        timeit.start('total')
        while True:
            train_step = self._get_train_step()
            if inference_step > self._total_steps:
                break

            ### sample and add to buffer
            if inference_step > self._sample_after_n_steps:
                timeit.start('sample')
                inference_step = self._inference_step(inference_step)
                timeit.stop('sample')
            else:
                inference_step += self._sampler.n_envs

            ### sample and DON'T add to buffer (for validation)
            if self._eval_sampler is not None and inference_step > 0 and inference_step % self._eval_every_n_steps == 0:
                timeit.start('eval')
                eval_rollouts_step = []
                eval_step = inference_step
                while len(eval_rollouts_step) == 0:
                    self._eval_sampler.step(eval_step, explore=False)
                    eval_rollouts_step = self._eval_sampler.get_recent_paths()
                    eval_step += 1
                eval_rollouts += eval_rollouts_step
                timeit.stop('eval')

            ### log
            if inference_step % self._log_every_n_steps == 0:
                logger.info('train itr {0:04d} inference itr {1:04d}'.format(
                    train_itr, inference_itr))
                logger.record_tabular('Train step', train_step)
                logger.record_tabular('Inference step', inference_step)
                self._sampler.log()
                if self._eval_sampler:
                    self._eval_sampler.log(prefix='Eval')
                logger.dump_tabular(print_func=logger.info)
                timeit.stop('total')
                for line in str(timeit).split('\n'):
                    logger.debug(line)
                timeit.reset()
                timeit.start('total')

            ### save rollouts / load model
            train_rollouts += self._sampler.get_recent_paths()
            if inference_step > 0 and inference_step % self._inference_save_every_n_steps == 0:
                self._inference_reset_sampler()

                ### save rollouts
                logger.debug('Saving files for itr {0}'.format(inference_itr))
                self._save_inference(inference_itr, train_rollouts,
                                     eval_rollouts)
                inference_itr += 1
                train_rollouts = []
                eval_rollouts = []

                ### load model
                with self._rsync_lock:  # to ensure the ckpt has been fully transferred over
                    new_train_itr = self._get_train_itr()
                    if train_itr < new_train_itr:
                        logger.debug(
                            'Loading policy for itr {0}'.format(new_train_itr -
                                                                1))
                        try:
                            self._policy.restore(
                                self._inference_policy_file_name(
                                    new_train_itr - 1),
                                train=False)
                            train_itr = new_train_itr
                        except:
                            logger.debug(
                                'Failed to load model for itr {0}'.format(
                                    new_train_itr - 1))
                            self._policy.restore(
                                self._inference_policy_file_name(train_itr -
                                                                 1),
                                train=False)
                            logger.debug('As backup, restored itr {0}'.format(
                                train_itr - 1))

        self._save_inference(inference_itr, self._sampler.get_recent_paths(),
                             eval_rollouts)
Ejemplo n.º 10
0
    def train(self):
        ### restore where we left off
        init_inference_step = len(self._sampler)  # don't count offpolicy
        self._restore_train()
        train_itr = self._get_train_itr()
        train_step = self._get_train_step()
        inference_itr = self._get_inference_itr()

        target_updated = False

        timeit.reset()
        timeit.start('total')
        while True:
            inference_step = len(self._sampler) - init_inference_step
            if inference_step > self._total_steps or train_step > self._train_total_steps:
                break

            if inference_step >= self._learn_after_n_steps:
                ### training step
                train_step += 1
                timeit.start('batch')
                steps, observations, goals, actions, rewards, dones, _ = \
                    self._sampler.sample(self._batch_size)
                timeit.stop('batch')
                timeit.start('train')
                self._policy.train_step(train_step,
                                        steps=steps,
                                        observations=observations,
                                        goals=goals,
                                        actions=actions,
                                        rewards=rewards,
                                        dones=dones,
                                        use_target=target_updated)
                timeit.stop('train')

                ### update target network
                if train_step > self._update_target_after_n_steps and train_step % self._update_target_every_n_steps == 0:
                    self._policy.update_target()
                    target_updated = True

                ### log
                if train_step % self._log_every_n_steps == 0:
                    logger.info(
                        'train itr {0:04d} inference itr {1:04d}'.format(
                            train_itr, inference_itr))
                    logger.record_tabular('Train step', train_step)
                    logger.record_tabular('Inference step', inference_step)
                    self._policy.log()
                    logger.dump_tabular(print_func=logger.info)
                    timeit.stop('total')
                    for line in str(timeit).split('\n'):
                        logger.debug(line)
                    timeit.reset()
                    timeit.start('total')
            else:
                time.sleep(1)

            ### save model
            if train_step > 0 and train_step % self._train_save_every_n_steps == 0:
                logger.debug('Saving files for itr {0}'.format(train_itr))
                self._save_train(train_itr)
                train_itr += 1

            ### reset model
            if train_step > 0 and self._train_reset_every_n_steps is not None and \
                                    train_step % self._train_reset_every_n_steps == 0:
                logger.debug('Resetting model')
                self._policy.reset_weights()

            ### load data
            inference_itr = self._train_load_data(inference_itr)
Ejemplo n.º 11
0
 def log(self):
     for k in sorted(self._log_stats.keys()):
         logger.record_tabular(k, np.mean(self._log_stats[k]))
     self._log_stats.clear()
Ejemplo n.º 12
0
    def log(self, prefix=''):
        self._log_stats['Time'] = [
            time.time() - self._last_get_log_stats_time
        ] if self._last_get_log_stats_time else [0.]

        logger.record_tabular(prefix + 'CumRewardMean',
                              np.mean(self._log_stats['CumReward']))
        logger.record_tabular(prefix + 'CumRewardStd',
                              np.std(self._log_stats['CumReward']))
        logger.record_tabular(prefix + 'AvgRewardMean',
                              np.mean(self._log_stats['AvgReward']))
        logger.record_tabular(prefix + 'AvgRewardStd',
                              np.std(self._log_stats['AvgReward']))
        logger.record_tabular(prefix + 'FinalRewardMean',
                              np.mean(self._log_stats['FinalReward']))
        logger.record_tabular(prefix + 'FinalRewardStd',
                              np.std(self._log_stats['FinalReward']))
        logger.record_tabular(prefix + 'EpisodeLengthMean',
                              np.mean(self._log_stats['EpisodeLength']))
        logger.record_tabular(prefix + 'EpisodeLengthStd',
                              np.std(self._log_stats['EpisodeLength']))

        logger.record_tabular(prefix + 'NumEpisodes',
                              len(self._log_stats['EpisodeLength']))
        logger.record_tabular(prefix + 'Time',
                              np.mean(self._log_stats['Time']))

        self._last_get_log_stats_time = time.time()
        self._log_stats = defaultdict(list)
Ejemplo n.º 13
0
    def train(self):
        ### restore where we left off
        self._restore_train()
        train_itr = self._get_train_itr()
        train_step = self._get_train_step()
        inference_itr = self._get_inference_itr()
        init_inference_step = len(self._sampler)

        target_updated = False

        timeit.reset()
        timeit.start('total')
        while True:
            inference_step = len(self._sampler) - init_inference_step
            if inference_step > self._total_steps:
                break

            if inference_step >= self._learn_after_n_steps:
                ### update preprocess
                if train_step % self._update_preprocess_every_n_steps == 0:
                    self._policy.update_preprocess(self._sampler.statistics)

                ### training step
                train_step += 1
                timeit.start('batch')
                batch = self._sampler.sample(self._batch_size)
                timeit.stop('batch')
                timeit.start('train')
                self._policy.train_step(train_step,
                                        *batch,
                                        use_target=target_updated)
                timeit.stop('train')

                ### update target network
                if train_step > self._update_target_after_n_steps and train_step % self._update_target_every_n_steps == 0:
                    self._policy.update_target()
                    target_updated = True

                ### log
                if train_step % self._log_every_n_steps == 0:
                    logger.info(
                        'train itr {0:04d} inference itr {1:04d}'.format(
                            train_itr, inference_itr))
                    logger.record_tabular('Train step', train_step)
                    logger.record_tabular('Inference step', inference_step)
                    self._policy.log()
                    logger.dump_tabular(print_func=logger.info)
                    timeit.stop('total')
                    for line in str(timeit).split('\n'):
                        logger.debug(line)
                    timeit.reset()
                    timeit.start('total')
            else:
                time.sleep(1)

            ### save model
            if train_step > 0 and train_step % self._train_save_every_n_steps == 0:
                logger.debug('Saving files for itr {0}'.format(train_itr))
                self._save_train(train_itr)
                train_itr += 1

            ### reset model
            if train_step > 0 and self._train_reset_every_n_steps is not None and \
                                    train_step % self._train_reset_every_n_steps == 0:
                logger.debug('Resetting model')
                self._policy.reset_weights()

            ### load data
            new_inference_itr = self._get_inference_itr()
            if inference_itr < new_inference_itr:
                for i in range(inference_itr, new_inference_itr):
                    try:
                        logger.debug('Loading files for itr {0}'.format(i))
                        self._sampler.add_rollouts(
                            [self._train_rollouts_file_name(i)])
                        inference_itr = i + 1
                    except:
                        logger.debug(
                            'Failed to load files for itr {0}'.format(i))
Ejemplo n.º 14
0
    def inference(self):
        ### restore where we left off
        self._restore_inference()
        inference_itr = self._get_inference_itr()
        inference_step = self._get_inference_step()
        train_itr = self._get_train_itr()

        self._run_rsync()

        assert (self._eval_sampler is None)  # TODO: temporary
        train_rollouts = []
        eval_rollouts = []

        self._reset_sampler()

        timeit.reset()
        timeit.start('total')
        while True:
            train_step = self._get_train_step()
            if inference_step > self._total_steps:
                break

            ### sample and add to buffer
            if inference_step > self._sample_after_n_steps:
                timeit.start('sample')
                try:
                    self._sampler.step(
                        inference_step,
                        take_random_actions=(
                            inference_step <= self._learn_after_n_steps
                            or inference_step <= self._onpolicy_after_n_steps),
                        explore=True)
                    inference_step += self._sampler.n_envs
                except Exception as e:
                    logger.warn('Sampler exception {0}'.format(str(e)))
                    trashed_steps = self._sampler.trash_current_rollouts()
                    inference_step -= trashed_steps
                    logger.warn('Trashed {0} steps'.format(trashed_steps))
                    while not self._env.ros_is_good(
                            print=False):  # TODO hard coded
                        time.sleep(0.25)
                    self._reset_sampler()
                    logger.warn('Continuing...')
                timeit.stop('sample')
            else:
                inference_step += self._sampler.n_envs

            ### sample and DON'T add to buffer (for validation)
            if self._eval_sampler is not None and inference_step > 0 and inference_step % self._eval_every_n_steps == 0:
                timeit.start('eval')
                eval_rollouts_step = []
                eval_step = inference_step
                while len(eval_rollouts_step) == 0:
                    self._eval_sampler.step(eval_step, explore=False)
                    eval_rollouts_step = self._eval_sampler.get_recent_paths()
                    eval_step += 1
                eval_rollouts += eval_rollouts_step
                timeit.stop('eval')

            ### log
            if inference_step % self._log_every_n_steps == 0:
                logger.info('train itr {0:04d} inference itr {1:04d}'.format(
                    train_itr, inference_itr))
                logger.record_tabular('Train step', train_step)
                logger.record_tabular('Inference step', inference_step)
                self._sampler.log()
                if self._eval_sampler:
                    self._eval_sampler.log(prefix='Eval')
                logger.dump_tabular(print_func=logger.info)
                timeit.stop('total')
                for line in str(timeit).split('\n'):
                    logger.debug(line)
                timeit.reset()
                timeit.start('total')

            ### save rollouts / load model
            train_rollouts += self._sampler.get_recent_paths()
            if inference_step > 0 and inference_step % self._inference_save_every_n_steps == 0 and \
                            len(train_rollouts) > 0:
                response = input('Keep rollouts?')
                if response != 'y':
                    train_rollouts = []
                    continue

                ### reset to stop rollout
                self._sampler.reset()

                ### save rollouts
                logger.debug('Saving files for itr {0}'.format(inference_itr))
                self._save_inference(inference_itr, train_rollouts,
                                     eval_rollouts)
                inference_itr += 1
                train_rollouts = []
                eval_rollouts = []

                ### load model
                with self._rsync_lock:  # to ensure the ckpt has been fully transferred over
                    new_train_itr = self._get_train_itr()
                    if train_itr < new_train_itr:
                        logger.debug(
                            'Loading policy for itr {0}'.format(new_train_itr -
                                                                1))
                        try:
                            self._policy.restore(
                                self._inference_policy_file_name(
                                    new_train_itr - 1),
                                train=False)
                            train_itr = new_train_itr
                        except:
                            logger.debug(
                                'Failed to load model for itr {0}'.format(
                                    new_train_itr - 1))
                            self._policy.restore(
                                self._inference_policy_file_name(train_itr -
                                                                 1),
                                train=False)
                            logger.debug('As backup, restored itr {0}'.format(
                                train_itr - 1))

        self._save_inference(inference_itr, self._sampler.get_recent_paths(),
                             eval_rollouts)