Ejemplo n.º 1
0
def test_agent():
    from rtrl import Training, run
    Sac_Test = partial(
        Training,
        epochs=3,
        rounds=5,
        steps=100,
        Agent=partial(Agent,
                      memory_size=1000000,
                      start_training=256,
                      batchsize=4),
        Env=partial(id="Pendulum-v0", real_time=0),
    )
    run(Sac_Test)
Ejemplo n.º 2
0
def test_agent_avenue():
    from rtrl import Training, run
    from rtrl.envs import AvenueEnv
    Sac_Avenue_Test = partial(
        Training,
        epochs=3,
        rounds=5,
        steps=300,
        Agent=partial(AvenueAgent,
                      device='cpu',
                      training_interval=4,
                      start_training=400),
        Env=partial(AvenueEnv, real_time=0),
        Test=partial(number=0),  # laptop can't handle more than that
    )
    run(Sac_Avenue_Test)
Ejemplo n.º 3
0
        os.mkdir(path)
    save_json(partial_to_dict(run_cls), path + '/spec.json')
    if not exists(path + '/stats'):
        dump(pd.DataFrame(), path + '/stats')
    for stats in iterate_episodes(run_cls, path + '/state'):
        dump(
            load(path + '/stats').append(stats, ignore_index=True),
            path + '/stats')  # concat with stats from previous episodes


# === specifications ===================================================================================================

TestTraining = partial(
    Training,
    epochs=3,
    rounds=5,
    steps=10,
    Agent=partial(memory_size=1000000),
    Env=partial(id="Pendulum-v0"),
)

SacTraining = partial(Training,
                      Agent=partial(rtrl.sac.Agent),
                      Env=partial(id="Pendulum-v0"),
                      Test=partial(number=4))

RtacTraining = partial(
    SacTraining,
    Agent=partial(rtrl.rtac.Agent),
    Env=partial(real_time=True),
)
Ejemplo n.º 4
0
                                   self.outputnorm.parameters(),
                                   self.target_update)

        return dict(
            loss_actor=loss_actor.detach(),
            loss_critic=loss_critic.detach(),
            outputnorm_mean=float(self.outputnorm.mean),
            outputnorm_std=float(self.outputnorm.std),
            memory_size=len(self.memory),
        )


AvenueAgent = partial(Agent,
                      entropy_scale=0.05,
                      lr=0.0002,
                      memory_size=500000,
                      batchsize=100,
                      training_interval=4,
                      start_training=10000,
                      Model=partial(rtrl.sac_models.ConvModel))


# === tests ============================================================================================================
def test_agent():
    from rtrl import Training, run
    Sac_Test = partial(
        Training,
        epochs=3,
        rounds=5,
        steps=100,
        Agent=partial(Agent,
                      memory_size=1000000,
Ejemplo n.º 5
0
Archivo: rtac.py Proyecto: wsg1873/rtrl
        return dict(
            loss_total=loss_total.detach(),
            loss_critic=loss_critic.detach(),
            loss_actor=loss_actor.detach(),
            outputnorm_mean=float(self.outputnorm.mean),
            outputnorm_std=float(self.outputnorm.std),
            memory_size=len(self.memory),
            # entropy_scale=self.entropy_scale
        )


AvenueAgent = partial(Agent,
                      entropy_scale=0.05,
                      lr=0.0002,
                      memory_size=500000,
                      batchsize=100,
                      training_interval=4,
                      start_training=10000,
                      Model=partial(ConvDouble))

if __name__ == "__main__":
    from rtrl import Training, run
    from rtrl import rtac_models
    Rtac_Test = partial(
        Training,
        epochs=3,
        rounds=5,
        steps=500,
        Agent=partial(Agent,
                      device='cpu',
                      memory_size=1000000,
Ejemplo n.º 6
0
        os.mkdir(path)
    save_json(partial_to_dict(run_cls), path + '/spec.json')
    if not exists(path + '/stats'):
        dump(pd.DataFrame(), path + '/stats')
    for stats in iterate_episodes(run_cls, path + '/state'):
        dump(
            load(path + '/stats').append(stats, ignore_index=True),
            path + '/stats')  # concat with stats from previous episodes


# === specifications ===================================================================================================

TestTraining = partial(
    Training,
    epochs=3,
    rounds=5,
    steps=10,
    Agent=partial(memory_size=1000000),
    Env=partial(id="Pendulum-v0"),
)

SacTraining = partial(Training,
                      Agent=partial(rtrl.sac.Agent),
                      Env=partial(id="Pendulum-v0"),
                      Test=partial(number=4))

RtacTraining = partial(
    SacTraining,
    Agent=partial(rtrl.rtac.Agent),
    Env=partial(real_time=True),
)
Ejemplo n.º 7
0
        return dict(
            loss_total=loss_total.detach(),
            loss_critic=loss_critic.detach(),
            loss_actor=loss_actor.detach(),
            outputnorm_mean=float(self.outputnorm.mean),
            outputnorm_std=float(self.outputnorm.std),
            memory_size=len(self.memory),
            # entropy_scale=self.entropy_scale
        )


AvenueAgent = partial(Agent,
                      entropy_scale=0.05,
                      lr=0.0002,
                      memory_size=500000,
                      batchsize=100,
                      training_interval=4,
                      start_training=10000,
                      Model=partial(ConvDouble))

if __name__ == "__main__":
    from rtrl import Training, run
    from rtrl import rtac_models
    # Rtac_Test = partial(
    #   Training,
    #   epochs=3,
    #   rounds=5,
    #   steps=500,
    #   Agent=partial(Agent, device='cpu', memory_size=1000000, start_training=256, batchsize=4),
    #   Env=partial(id="Pendulum-v0", real_time=True),
    #   # Env=partial(id="HalfCheetah-v2", real_time=True),