def train(self): plotter = Plotter() if self.plot: plotter.init_plot(self.env, self.policy) self.start_worker() self.init_opt() for itr in range(self.current_itr, self.n_itr): with logger.prefix('itr #{} | '.format(itr)): paths = self.sampler.obtain_samples(itr) samples_data = self.sampler.process_samples(itr, paths) self.log_diagnostics(paths) self.optimize_policy(itr, samples_data) logger.log('Saving snapshot...') params = self.get_itr_snapshot(itr, samples_data) self.current_itr = itr + 1 params['algo'] = self if self.store_paths: params['paths'] = samples_data['paths'] snapshotter.save_itr_params(itr, params) logger.log('saved') logger.log(tabular) if self.plot: plotter.update_plot(self.policy, self.max_path_length) if self.pause_for_plot: input('Plotting evaluation run: Press Enter to ' 'continue...') plotter.close() self.shutdown_worker()
def train(self, n_epochs, n_epoch_cycles=1, batch_size=None, plot=False, store_paths=False, pause_for_plot=False): """Start training. Args: n_epochs: Number of epochs. n_epoch_cycles: Number of batches of samples in each epoch. This is only useful for off-policy algorithm. For on-policy algorithm this value should always be 1. batch_size: Number of steps in batch. plot: Visualize policy by doing rollout after each epoch. store_paths: Save paths in snapshot. pause_for_plot: Pause for plot. Returns: The average return in last epoch cycle. """ assert self.has_setup, ('Use Runner.setup() to setup runner before ' 'training.') if batch_size is None: from garage.tf.samplers import OffPolicyVectorizedSampler if isinstance(self.sampler, OffPolicyVectorizedSampler): batch_size = self.algo.max_path_length else: batch_size = 40 * self.algo.max_path_length self.n_epoch_cycles = n_epoch_cycles self.plot = plot self.start_worker() self.start_time = time.time() itr = 0 last_return = None for epoch in range(n_epochs): self.itr_start_time = time.time() paths = None with logger.prefix('epoch #%d | ' % epoch): for cycle in range(n_epoch_cycles): paths = self.obtain_samples(itr, batch_size) import pdb pdb.set_trace() paths = self.sampler.process_samples(itr, paths) last_return = self.algo.train_once(itr, paths) itr += 1 self.save_snapshot(epoch, paths if store_paths else None) self.log_diagnostics(pause_for_plot) logger.dump_all(itr) tabular.clear() self.shutdown_worker() return last_return
def train(self, sess=None): address = ("localhost", 6000) conn = Client(address) last_average_return = None try: created_session = True if (sess is None) else False if sess is None: sess = tf.Session() sess.__enter__() sess.run(tf.global_variables_initializer()) conn.send(ExpLifecycle.START) self.start_worker(sess) start_time = time.time() for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() with logger.prefix('itr #%d | ' % itr): logger.log("Obtaining samples...") conn.send(ExpLifecycle.OBTAIN_SAMPLES) paths = self.obtain_samples(itr) logger.log("Processing samples...") conn.send(ExpLifecycle.PROCESS_SAMPLES) samples_data = self.process_samples(itr, paths) last_average_return = samples_data["average_return"] logger.log("Logging diagnostics...") self.log_diagnostics(paths) logger.log("Optimizing policy...") conn.send(ExpLifecycle.OPTIMIZE_POLICY) self.optimize_policy(itr, samples_data) logger.log("Saving snapshot...") params = self.get_itr_snapshot(itr) if self.store_paths: params["paths"] = samples_data["paths"] snapshotter.save_itr_params(itr, params) logger.log("Saved") tabular.record('Time', time.time() - start_time) tabular.record('ItrTime', time.time() - itr_start_time) logger.log(tabular) if self.plot: conn.send(ExpLifecycle.UPDATE_PLOT) self.plotter.update_plot(self.policy, self.max_path_length) if self.pause_for_plot: input("Plotting evaluation run: Press Enter to " "continue...") conn.send(ExpLifecycle.SHUTDOWN) self.shutdown_worker() if created_session: sess.close() finally: conn.close() return last_average_return
def train(self): address = ('localhost', 6000) conn = Client(address) try: plotter = Plotter() if self.plot: plotter.init_plot(self.env, self.policy) conn.send(ExpLifecycle.START) self.start_worker() self.init_opt() for itr in range(self.current_itr, self.n_itr): with logger.prefix('itr #{} | '.format(itr)): conn.send(ExpLifecycle.OBTAIN_SAMPLES) paths = self.sampler.obtain_samples(itr) conn.send(ExpLifecycle.PROCESS_SAMPLES) samples_data = self.sampler.process_samples(itr, paths) self.log_diagnostics(paths) conn.send(ExpLifecycle.OPTIMIZE_POLICY) self.optimize_policy(itr, samples_data) logger.log('saving snapshot...') params = self.get_itr_snapshot(itr, samples_data) self.current_itr = itr + 1 params['algo'] = self if self.store_paths: params['paths'] = samples_data['paths'] snapshotter.save_itr_params(itr, params) logger.log('saved') logger.log(tabular) if self.plot: conn.send(ExpLifecycle.UPDATE_PLOT) plotter.update_plot(self.policy, self.max_path_length) if self.pause_for_plot: input('Plotting evaluation run: Press Enter to ' 'continue...') conn.send(ExpLifecycle.SHUTDOWN) plotter.close() self.shutdown_worker() finally: conn.close()