Beispiel #1
0
def learner2gif(lnr: DDPGLearner, s_format,
                group_interp: GroupAgentInterpretation, name: str, extra: str):
    meta = f'{lnr.memory.__class__.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
    interp = AgentInterpretation(lnr, ds_type=DatasetType.Train)
    interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
    group_interp.add_interpretation(interp)
    group_interp.to_pickle(
        f'../docs_src/data/{name}_{lnr.model.name.lower()}/',
        f'{lnr.model.name.lower()}_{meta}')
    [g.write(f'../res/run_gifs/{name}_{extra}') for g in interp.generate_gif()]
Beispiel #2
0
def test_ddpg_models_mountain_car_continuous(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    for i in range(5):
        print('\n')
        data = MDPDataBunch.from_env(
            'MountainCarContinuous-v0',
            render='rgb_array',
            bs=40,
            add_valid=False,
            keep_env_open=False,
            feed_type=s_format,
            memory_management_strategy='k_partitions_top',
            k=3,
            res_wrap=partial(ResolutionWrapper, w_step=2, h_step=2))
        exploration_method = OrnsteinUhlenbeck(
            size=data.action.taken_action.shape,
            epsilon_start=1,
            epsilon_end=0.1,
            decay=0.0001)
        memory = experience(memory_size=1000000, reduce_ram=True)
        model = create_ddpg_model(data=data, base_arch=model_cls)
        learner = ddpg_learner(data=data,
                               model=model,
                               memory=memory,
                               exploration_method=exploration_method,
                               callback_fns=[RewardMetric, EpsilonMetric])
        learner.fit(450)

        meta = f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
        interp = AgentInterpretation(learner, ds_type=DatasetType.Train)
        interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
        group_interp.add_interpretation(interp)
        group_interp.to_pickle(
            f'../docs_src/data/mountaincarcontinuous_{model.name.lower()}/',
            f'{model.name.lower()}_{meta}')
        [
            g.write('../res/run_gifs/mountaincarcontinuous')
            for g in interp.generate_gif()
        ]
        data.close()
        del learner
        del model
        del data
def test_dqn_models_mountaincar(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    for i in range(5):
        learn = trained_learner(model_cls,
                                'MountainCar-v0',
                                s_format,
                                experience,
                                bs=32,
                                layers=[24, 12],
                                memory_size=1000000,
                                decay=0.00001,
                                copy_over_frequency=1000)
        meta = f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format == FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
        interp = AgentInterpretation(learn, ds_type=DatasetType.Train)
        interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
        group_interp.add_interpretation(interp)
        filename = f'{learn.model.name.lower()}_{meta}'
        group_interp.to_pickle(
            f'../docs_src/data/mountaincar_{learn.model.name.lower()}/',
            filename)
        [g.write('../res/run_gifs/mountaincar') for g in interp.generate_gif()]
        del learn