def dora_run_softmax(run_name='default', plot=False):
    selection = LLL_softmax()
    env = CartPoleVision()
    Qnet = selectNet()
    Enet = selectNet(Enet=True)

    t = DoraTrainer(Qnet, Enet, env, selection, lr=0.01, run_name=run_name, plot=plot)
    t.run()
def optimistic_dqn_run_softmax(run_name='default', plot=False):
    selection = softmax()
    env = CartPoleVision()
    optQnet = selectNet(Mode="optimisticQ")

    t = Trainer(optQnet, env, selection, run_name=run_name, plot=plot)
    t.run()
def dqn_run(run_name='default', plot=False):
    selection = epsilon_greedy()
    env = CartPoleVision()
    Qnet = selectNet()

    t = Trainer(Qnet, env, selection, run_name=run_name, plot=plot)
    t.run()
def dqn_run_softmax(run_name='default', plot=False):
    selection = softmax()
    env = CartPoleVision()
    Qnet = selectNet()

    t = Trainer(Qnet, env, selection, run_name=run_name, plot=plot)
    t.run()
예제 #5
0
def dora_run(env,
             run_name='default',
             plot=False,
             selection='softmax',
             setting=DefaultSetting(),
             log_path='logs'):
    selection = {
        'epsilon': LLL_epsilon_greedy(),
        'softmax': LLL_softmax()
    }[selection]
    Qnet = selectNet('dqn', env.name)
    Enet = selectNet('enet', env.name)

    t = DoraTrainer(Qnet,
                    Enet,
                    env,
                    selection,
                    run_name=run_name,
                    plot=plot,
                    setting=setting,
                    log_path=log_path)
    t.run()