def test_agent(): from rtrl import Training, run Sac_Test = partial( Training, epochs=3, rounds=5, steps=100, Agent=partial(Agent, memory_size=1000000, start_training=256, batchsize=4), Env=partial(id="Pendulum-v0", real_time=0), ) run(Sac_Test)
def test_agent_avenue(): from rtrl import Training, run from rtrl.envs import AvenueEnv Sac_Avenue_Test = partial( Training, epochs=3, rounds=5, steps=300, Agent=partial(AvenueAgent, device='cpu', training_interval=4, start_training=400), Env=partial(AvenueEnv, real_time=0), Test=partial(number=0), # laptop can't handle more than that ) run(Sac_Avenue_Test)
os.mkdir(path) save_json(partial_to_dict(run_cls), path + '/spec.json') if not exists(path + '/stats'): dump(pd.DataFrame(), path + '/stats') for stats in iterate_episodes(run_cls, path + '/state'): dump( load(path + '/stats').append(stats, ignore_index=True), path + '/stats') # concat with stats from previous episodes # === specifications =================================================================================================== TestTraining = partial( Training, epochs=3, rounds=5, steps=10, Agent=partial(memory_size=1000000), Env=partial(id="Pendulum-v0"), ) SacTraining = partial(Training, Agent=partial(rtrl.sac.Agent), Env=partial(id="Pendulum-v0"), Test=partial(number=4)) RtacTraining = partial( SacTraining, Agent=partial(rtrl.rtac.Agent), Env=partial(real_time=True), )
self.outputnorm.parameters(), self.target_update) return dict( loss_actor=loss_actor.detach(), loss_critic=loss_critic.detach(), outputnorm_mean=float(self.outputnorm.mean), outputnorm_std=float(self.outputnorm.std), memory_size=len(self.memory), ) AvenueAgent = partial(Agent, entropy_scale=0.05, lr=0.0002, memory_size=500000, batchsize=100, training_interval=4, start_training=10000, Model=partial(rtrl.sac_models.ConvModel)) # === tests ============================================================================================================ def test_agent(): from rtrl import Training, run Sac_Test = partial( Training, epochs=3, rounds=5, steps=100, Agent=partial(Agent, memory_size=1000000,
return dict( loss_total=loss_total.detach(), loss_critic=loss_critic.detach(), loss_actor=loss_actor.detach(), outputnorm_mean=float(self.outputnorm.mean), outputnorm_std=float(self.outputnorm.std), memory_size=len(self.memory), # entropy_scale=self.entropy_scale ) AvenueAgent = partial(Agent, entropy_scale=0.05, lr=0.0002, memory_size=500000, batchsize=100, training_interval=4, start_training=10000, Model=partial(ConvDouble)) if __name__ == "__main__": from rtrl import Training, run from rtrl import rtac_models Rtac_Test = partial( Training, epochs=3, rounds=5, steps=500, Agent=partial(Agent, device='cpu', memory_size=1000000,
return dict( loss_total=loss_total.detach(), loss_critic=loss_critic.detach(), loss_actor=loss_actor.detach(), outputnorm_mean=float(self.outputnorm.mean), outputnorm_std=float(self.outputnorm.std), memory_size=len(self.memory), # entropy_scale=self.entropy_scale ) AvenueAgent = partial(Agent, entropy_scale=0.05, lr=0.0002, memory_size=500000, batchsize=100, training_interval=4, start_training=10000, Model=partial(ConvDouble)) if __name__ == "__main__": from rtrl import Training, run from rtrl import rtac_models # Rtac_Test = partial( # Training, # epochs=3, # rounds=5, # steps=500, # Agent=partial(Agent, device='cpu', memory_size=1000000, start_training=256, batchsize=4), # Env=partial(id="Pendulum-v0", real_time=True), # # Env=partial(id="HalfCheetah-v2", real_time=True),