def train(self, args): self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics) sess = tf.get_default_session() self.save = functools.partial(save_variables, sess=sess) self.load = functools.partial(load_variables, sess=sess) checkdir = osp.join(logger.get_dir(), 'checkpoints') os.makedirs(checkdir, exist_ok=True) load_weights = args['load_weights'] start_nupdates = 0 if load_weights is not None: load_path = osp.join(checkdir, load_weights) start_nupdates = int(load_weights) print('Loading checkpoint from %s ' % load_weights) self.load(load_path) while True: info = self.agent.step() if info['update']: info['update']['n_updates'] += start_nupdates info['update']['tcount'] += start_nupdates*args['nsteps_per_seg']*args['envs_per_process'] logger.logkvs(info['update']) logger.dumpkvs() if info['update']['n_updates'] % 10 == 0: weights_index = info['update']['n_updates'] savepath = osp.join(checkdir, '%.5i'% weights_index) print('Saving to', savepath) self.save(savepath) if self.agent.rollout.stats['tcount'] > self.num_timesteps: break self.agent.stop_interaction()
def train(self): self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics) while True: info = self.agent.step() if info['update']: logger.logkvs(info['update']) logger.dumpkvs() if self.agent.rollout.stats['tcount'] > self.num_timesteps: break self.agent.stop_interaction()
def train(self): self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics) if self.hps['ckptpath'] is not None: self.agent.restore_model(logdir=self.hps['ckptpath'], exp_name=self.hps['exp_name']) while True: info = self.agent.step() if info['update']: logger.logkvs(info['update']) logger.dumpkvs() if info['update']['n_updates'] % 60 == 0: self.agent.save_model( logdir=logger.get_dir(), exp_name=self.hps['exp_name'], global_step=info['update']['n_updates']) if self.agent.rollout.stats['tcount'] > self.num_timesteps: break self.agent.stop_interaction()
def train(self): self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics) sess = tf.get_default_session() self.save = functools.partial(save_variables, sess=sess) while True: info = self.agent.step() if info['update']: logger.logkvs(info['update']) logger.dumpkvs() if info['update']['n_updates'] % 10 == 0: checkdir = osp.join(logger.get_dir(), 'checkpoints') os.makedirs(checkdir, exist_ok=True) savepath = osp.join(checkdir, '%.5i' % info['update']['n_updates']) print('Saving to', savepath) self.save(savepath) if self.agent.rollout.stats['tcount'] > self.num_timesteps: break self.agent.stop_interaction()