예제 #1
0
    def train(self, args):
        self.agent.start_interaction(self.envs, nlump=self.hps['nlumps'], dynamics=self.dynamics)
        sess = tf.get_default_session()
        self.save = functools.partial(save_variables, sess=sess)
        self.load = functools.partial(load_variables, sess=sess)
        checkdir = osp.join(logger.get_dir(), 'checkpoints')
        os.makedirs(checkdir, exist_ok=True)
        load_weights = args['load_weights']
        start_nupdates = 0
        if load_weights is not None:
            load_path = osp.join(checkdir, load_weights)
            start_nupdates = int(load_weights)
            print('Loading checkpoint from %s ' % load_weights)
            self.load(load_path)

        while True:
            info = self.agent.step()
            if info['update']:
                info['update']['n_updates'] += start_nupdates
                info['update']['tcount'] += start_nupdates*args['nsteps_per_seg']*args['envs_per_process']
                logger.logkvs(info['update'])
                logger.dumpkvs()

                
                if info['update']['n_updates'] % 10 == 0: 
                    weights_index =  info['update']['n_updates']             
                    savepath = osp.join(checkdir, '%.5i'% weights_index)
                    print('Saving to', savepath)
                    self.save(savepath)

            if self.agent.rollout.stats['tcount'] > self.num_timesteps:
                break
        self.agent.stop_interaction()
예제 #2
0
    def train(self):
        self.agent.start_interaction(self.envs,
                                     nlump=self.hps['nlumps'],
                                     dynamics=self.dynamics)
        while True:
            info = self.agent.step()
            if info['update']:
                logger.logkvs(info['update'])
                logger.dumpkvs()
            if self.agent.rollout.stats['tcount'] > self.num_timesteps:
                break

        self.agent.stop_interaction()
예제 #3
0
    def train(self):
        self.agent.start_interaction(self.envs,
                                     nlump=self.hps['nlumps'],
                                     dynamics=self.dynamics)
        if self.hps['ckptpath'] is not None:
            self.agent.restore_model(logdir=self.hps['ckptpath'],
                                     exp_name=self.hps['exp_name'])
        while True:
            info = self.agent.step()
            if info['update']:
                logger.logkvs(info['update'])
                logger.dumpkvs()
                if info['update']['n_updates'] % 60 == 0:
                    self.agent.save_model(
                        logdir=logger.get_dir(),
                        exp_name=self.hps['exp_name'],
                        global_step=info['update']['n_updates'])
            if self.agent.rollout.stats['tcount'] > self.num_timesteps:
                break

        self.agent.stop_interaction()
예제 #4
0
    def train(self):
        self.agent.start_interaction(self.envs,
                                     nlump=self.hps['nlumps'],
                                     dynamics=self.dynamics)
        sess = tf.get_default_session()
        self.save = functools.partial(save_variables, sess=sess)
        while True:
            info = self.agent.step()
            if info['update']:
                logger.logkvs(info['update'])
                logger.dumpkvs()

                if info['update']['n_updates'] % 10 == 0:
                    checkdir = osp.join(logger.get_dir(), 'checkpoints')
                    os.makedirs(checkdir, exist_ok=True)
                    savepath = osp.join(checkdir,
                                        '%.5i' % info['update']['n_updates'])
                    print('Saving to', savepath)
                    self.save(savepath)

            if self.agent.rollout.stats['tcount'] > self.num_timesteps:
                break
        self.agent.stop_interaction()