def train(self): self._start_train_batch() logger.info('Training model') alg_args = self._params['alg'] total_steps = int(alg_args['total_steps']) save_every_n_steps = int(alg_args['save_every_n_steps']) update_target_after_n_steps = int( alg_args['update_target_after_n_steps']) update_target_every_n_steps = int( alg_args['update_target_every_n_steps']) log_every_n_steps = int(alg_args['log_every_n_steps']) timeit.reset() timeit.start('total') save_itr = 0 for step in range(total_steps): timeit.start('sample') # steps, observations, actions, rewards, dones, _ = self._replay_pool.sample(batch_size) steps, observations, actions, rewards, dones, _ = self._batch_queue.get( ) timeit.stop('sample') timeit.start('train') self._model.train_step(step, steps=steps, observations=observations, actions=actions, rewards=rewards, dones=dones, use_target=True) timeit.stop('train') ### update target network if step > update_target_after_n_steps and step % update_target_every_n_steps == 0: self._model.update_target() ### log if step > 0 and step % log_every_n_steps == 0: logger.record_tabular('Step', step) self._model.log() logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total') ### save model if step > 0 and step % save_every_n_steps == 0: logger.info('Saving files for itr {0}'.format(save_itr)) self._save_train_policy(save_itr) save_itr += 1 ### always save the end self._save_train_policy(save_itr) self._stop_train_batch()
def _train_model(self): for step in range(self._train_steps): self._trav_graph.train_step() if step % self._eval_every_n_steps == 0: self._trav_graph.holdout_cost() if step > 0 and step % self._log_every_n_steps == 0: logger.record_tabular('step', step) self._trav_graph.log() logger.dump_tabular(print_func=logger.info) if step > 0 and step % self._save_every_n_steps == 0: self._trav_graph.save()
def _run_log(self, step): logger.record_tabular('Step', step) self._env.log() self._replay_pool.log() if self._env_eval: self._env_eval.log(prefix='Eval') if self._replay_pool_eval: self._replay_pool_eval.log(prefix='Eval') self._policy.log() logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total')
def train_traversability_graph(save_folder, image_shape, trav_graph_params): ### create graph trav_graph = TraversabilityGraph(image_shape, save_folder, **trav_graph_params) ### load data images = np.load(os.path.join(save_folder, 'data_train_images.npy')) labels = np.load(os.path.join(save_folder, 'data_train_labels.npy')) images_holdout = np.load( os.path.join(save_folder, 'data_holdout_images.npy')) labels_holdout = np.load( os.path.join(save_folder, 'data_holdout_labels.npy')) train_steps = int(trav_graph_params['train_steps']) eval_every_n_steps = int(trav_graph_params['eval_every_n_steps']) batch_size = trav_graph_params['batch_size'] log_every_n_steps = int(trav_graph_params['log_every_n_steps']) save_every_n_steps = int(trav_graph_params['save_every_n_steps']) for step in range(train_steps): indices = np.random.randint(low=0, high=len(images), size=batch_size) images_batch = images[indices] labels_batch = labels[indices] trav_graph.train_step(images_batch, labels_batch) if step % eval_every_n_steps == 0: indices = np.random.randint(low=0, high=len(images_holdout), size=batch_size) images_holdout_batch = images_holdout[indices] labels_holdout_batch = labels_holdout[indices] trav_graph.holdout_cost(images_holdout_batch, labels_holdout_batch) if step > 0 and step % log_every_n_steps == 0: trav_graph.log() logger.dump_tabular(print_func=logger.info) if step > 0 and step % save_every_n_steps == 0: trav_graph.save()
def train_model(self): ### create graph trav_graph = TraversabilityGraph(self._obs_shape, self._save_folder, **labeller_params) train_steps = int(labeller_params['train_steps']) eval_every_n_steps = int(labeller_params['eval_every_n_steps']) log_every_n_steps = int(labeller_params['log_every_n_steps']) save_every_n_steps = int(labeller_params['save_every_n_steps']) for step in range(train_steps): trav_graph.train_step() if step % eval_every_n_steps == 0: trav_graph.holdout_cost() if step > 0 and step % log_every_n_steps == 0: logger.record_tabular('step', step) trav_graph.log() logger.dump_tabular(print_func=logger.info) if step > 0 and step % save_every_n_steps == 0: trav_graph.save()
def _log(self): self._env_eval.log() self._replay_pool_eval.log() self._policy.log() logger.dump_tabular(print_func=logger.info)
def train(self): ### restore where we left off save_itr = self._restore() target_updated = False eval_rollouts = [] self._sampler.reset() if self._eval_sampler is not None: self._eval_sampler.reset() timeit.reset() timeit.start('total') for step in range(0, self._total_steps, self._sampler.n_envs): ### sample and add to buffer if step > self._sample_after_n_steps: timeit.start('sample') self._sampler.step( step, take_random_actions=(step <= self._onpolicy_after_n_steps), explore=True) timeit.stop('sample') ### sample and DON'T add to buffer (for validation) if self._eval_sampler is not None and step > 0 and step % self._eval_every_n_steps == 0: timeit.start('eval') for _ in range(self._rollouts_per_eval): eval_rollouts_step = [] eval_step = step while len(eval_rollouts_step) == 0: self._eval_sampler.step(eval_step, explore=False) eval_rollouts_step = self._eval_sampler.get_recent_paths( ) eval_step += 1 eval_rollouts += eval_rollouts_step timeit.stop('eval') if step >= self._learn_after_n_steps: ### training step if self._train_every_n_steps >= 1: if step % int(self._train_every_n_steps) == 0: timeit.start('batch') steps, observations, goals, actions, rewards, dones, _ = \ self._sampler.sample(self._batch_size) timeit.stop('batch') timeit.start('train') self._policy.train_step(step, steps=steps, observations=observations, goals=goals, actions=actions, rewards=rewards, dones=dones, use_target=target_updated) timeit.stop('train') else: for _ in range(int(1. / self._train_every_n_steps)): timeit.start('batch') steps, observations, goals, actions, rewards, dones, _ = \ self._sampler.sample(self._batch_size) timeit.stop('batch') timeit.start('train') self._policy.train_step(step, steps=steps, observations=observations, goals=goals, actions=actions, rewards=rewards, dones=dones, use_target=target_updated) timeit.stop('train') ### update target network if step > self._update_target_after_n_steps and step % self._update_target_every_n_steps == 0: self._policy.update_target() target_updated = True ### log if step % self._log_every_n_steps == 0: logger.record_tabular('Step', step) self._sampler.log() self._eval_sampler.log(prefix='Eval') self._policy.log() logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total') ### save model if step > 0 and step % self._save_every_n_steps == 0: logger.info('Saving files for itr {0}'.format(save_itr)) self._save(save_itr, self._sampler.get_recent_paths(), eval_rollouts) save_itr += 1 eval_rollouts = [] self._save(save_itr, self._sampler.get_recent_paths(), eval_rollouts)
def inference(self): ### restore where we left off self._restore_inference() inference_itr = self._get_inference_itr() inference_step = self._get_inference_step() train_itr = self._get_train_itr() self._run_rsync() train_rollouts = [] eval_rollouts = [] self._inference_reset_sampler() timeit.reset() timeit.start('total') while True: train_step = self._get_train_step() if inference_step > self._total_steps: break ### sample and add to buffer if inference_step > self._sample_after_n_steps: timeit.start('sample') inference_step = self._inference_step(inference_step) timeit.stop('sample') else: inference_step += self._sampler.n_envs ### sample and DON'T add to buffer (for validation) if self._eval_sampler is not None and inference_step > 0 and inference_step % self._eval_every_n_steps == 0: timeit.start('eval') eval_rollouts_step = [] eval_step = inference_step while len(eval_rollouts_step) == 0: self._eval_sampler.step(eval_step, explore=False) eval_rollouts_step = self._eval_sampler.get_recent_paths() eval_step += 1 eval_rollouts += eval_rollouts_step timeit.stop('eval') ### log if inference_step % self._log_every_n_steps == 0: logger.info('train itr {0:04d} inference itr {1:04d}'.format( train_itr, inference_itr)) logger.record_tabular('Train step', train_step) logger.record_tabular('Inference step', inference_step) self._sampler.log() if self._eval_sampler: self._eval_sampler.log(prefix='Eval') logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total') ### save rollouts / load model train_rollouts += self._sampler.get_recent_paths() if inference_step > 0 and inference_step % self._inference_save_every_n_steps == 0: self._inference_reset_sampler() ### save rollouts logger.debug('Saving files for itr {0}'.format(inference_itr)) self._save_inference(inference_itr, train_rollouts, eval_rollouts) inference_itr += 1 train_rollouts = [] eval_rollouts = [] ### load model with self._rsync_lock: # to ensure the ckpt has been fully transferred over new_train_itr = self._get_train_itr() if train_itr < new_train_itr: logger.debug( 'Loading policy for itr {0}'.format(new_train_itr - 1)) try: self._policy.restore( self._inference_policy_file_name( new_train_itr - 1), train=False) train_itr = new_train_itr except: logger.debug( 'Failed to load model for itr {0}'.format( new_train_itr - 1)) self._policy.restore( self._inference_policy_file_name(train_itr - 1), train=False) logger.debug('As backup, restored itr {0}'.format( train_itr - 1)) self._save_inference(inference_itr, self._sampler.get_recent_paths(), eval_rollouts)
def train(self): ### restore where we left off init_inference_step = len(self._sampler) # don't count offpolicy self._restore_train() train_itr = self._get_train_itr() train_step = self._get_train_step() inference_itr = self._get_inference_itr() target_updated = False timeit.reset() timeit.start('total') while True: inference_step = len(self._sampler) - init_inference_step if inference_step > self._total_steps or train_step > self._train_total_steps: break if inference_step >= self._learn_after_n_steps: ### training step train_step += 1 timeit.start('batch') steps, observations, goals, actions, rewards, dones, _ = \ self._sampler.sample(self._batch_size) timeit.stop('batch') timeit.start('train') self._policy.train_step(train_step, steps=steps, observations=observations, goals=goals, actions=actions, rewards=rewards, dones=dones, use_target=target_updated) timeit.stop('train') ### update target network if train_step > self._update_target_after_n_steps and train_step % self._update_target_every_n_steps == 0: self._policy.update_target() target_updated = True ### log if train_step % self._log_every_n_steps == 0: logger.info( 'train itr {0:04d} inference itr {1:04d}'.format( train_itr, inference_itr)) logger.record_tabular('Train step', train_step) logger.record_tabular('Inference step', inference_step) self._policy.log() logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total') else: time.sleep(1) ### save model if train_step > 0 and train_step % self._train_save_every_n_steps == 0: logger.debug('Saving files for itr {0}'.format(train_itr)) self._save_train(train_itr) train_itr += 1 ### reset model if train_step > 0 and self._train_reset_every_n_steps is not None and \ train_step % self._train_reset_every_n_steps == 0: logger.debug('Resetting model') self._policy.reset_weights() ### load data inference_itr = self._train_load_data(inference_itr)
def train(self): ### restore where we left off self._restore_train() train_itr = self._get_train_itr() train_step = self._get_train_step() inference_itr = self._get_inference_itr() init_inference_step = len(self._sampler) target_updated = False timeit.reset() timeit.start('total') while True: inference_step = len(self._sampler) - init_inference_step if inference_step > self._total_steps: break if inference_step >= self._learn_after_n_steps: ### update preprocess if train_step % self._update_preprocess_every_n_steps == 0: self._policy.update_preprocess(self._sampler.statistics) ### training step train_step += 1 timeit.start('batch') batch = self._sampler.sample(self._batch_size) timeit.stop('batch') timeit.start('train') self._policy.train_step(train_step, *batch, use_target=target_updated) timeit.stop('train') ### update target network if train_step > self._update_target_after_n_steps and train_step % self._update_target_every_n_steps == 0: self._policy.update_target() target_updated = True ### log if train_step % self._log_every_n_steps == 0: logger.info( 'train itr {0:04d} inference itr {1:04d}'.format( train_itr, inference_itr)) logger.record_tabular('Train step', train_step) logger.record_tabular('Inference step', inference_step) self._policy.log() logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total') else: time.sleep(1) ### save model if train_step > 0 and train_step % self._train_save_every_n_steps == 0: logger.debug('Saving files for itr {0}'.format(train_itr)) self._save_train(train_itr) train_itr += 1 ### reset model if train_step > 0 and self._train_reset_every_n_steps is not None and \ train_step % self._train_reset_every_n_steps == 0: logger.debug('Resetting model') self._policy.reset_weights() ### load data new_inference_itr = self._get_inference_itr() if inference_itr < new_inference_itr: for i in range(inference_itr, new_inference_itr): try: logger.debug('Loading files for itr {0}'.format(i)) self._sampler.add_rollouts( [self._train_rollouts_file_name(i)]) inference_itr = i + 1 except: logger.debug( 'Failed to load files for itr {0}'.format(i))
def inference(self): ### restore where we left off self._restore_inference() inference_itr = self._get_inference_itr() inference_step = self._get_inference_step() train_itr = self._get_train_itr() self._run_rsync() assert (self._eval_sampler is None) # TODO: temporary train_rollouts = [] eval_rollouts = [] self._reset_sampler() timeit.reset() timeit.start('total') while True: train_step = self._get_train_step() if inference_step > self._total_steps: break ### sample and add to buffer if inference_step > self._sample_after_n_steps: timeit.start('sample') try: self._sampler.step( inference_step, take_random_actions=( inference_step <= self._learn_after_n_steps or inference_step <= self._onpolicy_after_n_steps), explore=True) inference_step += self._sampler.n_envs except Exception as e: logger.warn('Sampler exception {0}'.format(str(e))) trashed_steps = self._sampler.trash_current_rollouts() inference_step -= trashed_steps logger.warn('Trashed {0} steps'.format(trashed_steps)) while not self._env.ros_is_good( print=False): # TODO hard coded time.sleep(0.25) self._reset_sampler() logger.warn('Continuing...') timeit.stop('sample') else: inference_step += self._sampler.n_envs ### sample and DON'T add to buffer (for validation) if self._eval_sampler is not None and inference_step > 0 and inference_step % self._eval_every_n_steps == 0: timeit.start('eval') eval_rollouts_step = [] eval_step = inference_step while len(eval_rollouts_step) == 0: self._eval_sampler.step(eval_step, explore=False) eval_rollouts_step = self._eval_sampler.get_recent_paths() eval_step += 1 eval_rollouts += eval_rollouts_step timeit.stop('eval') ### log if inference_step % self._log_every_n_steps == 0: logger.info('train itr {0:04d} inference itr {1:04d}'.format( train_itr, inference_itr)) logger.record_tabular('Train step', train_step) logger.record_tabular('Inference step', inference_step) self._sampler.log() if self._eval_sampler: self._eval_sampler.log(prefix='Eval') logger.dump_tabular(print_func=logger.info) timeit.stop('total') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total') ### save rollouts / load model train_rollouts += self._sampler.get_recent_paths() if inference_step > 0 and inference_step % self._inference_save_every_n_steps == 0 and \ len(train_rollouts) > 0: response = input('Keep rollouts?') if response != 'y': train_rollouts = [] continue ### reset to stop rollout self._sampler.reset() ### save rollouts logger.debug('Saving files for itr {0}'.format(inference_itr)) self._save_inference(inference_itr, train_rollouts, eval_rollouts) inference_itr += 1 train_rollouts = [] eval_rollouts = [] ### load model with self._rsync_lock: # to ensure the ckpt has been fully transferred over new_train_itr = self._get_train_itr() if train_itr < new_train_itr: logger.debug( 'Loading policy for itr {0}'.format(new_train_itr - 1)) try: self._policy.restore( self._inference_policy_file_name( new_train_itr - 1), train=False) train_itr = new_train_itr except: logger.debug( 'Failed to load model for itr {0}'.format( new_train_itr - 1)) self._policy.restore( self._inference_policy_file_name(train_itr - 1), train=False) logger.debug('As backup, restored itr {0}'.format( train_itr - 1)) self._save_inference(inference_itr, self._sampler.get_recent_paths(), eval_rollouts)