예제 #1
0
    def policy_found(q, steps):
        from rl.environment import Environment
        from rl.agent import Agent
        from rl.stateaction import StateAction
        environment = Environment()
        agent = Agent(environment, Util.get_state_actions, q, 1, 1)
        maxStepsAllowed = Util.num_cols + Util.num_rows

        stepsToGoal = 0
        while stepsToGoal < maxStepsAllowed:
            stepsToGoal += 1
            prevState = agent.get_state()
            agent.test()
            action = agent.get_action()
            if prevState != Util.MIN_VALUE:
                steps.append(StateAction(prevState, action))

            if agent.get_state() == Util.get_goal_state():
                return True
            if agent.terminal:
                return False
        return agent.get_state() == Util.get_goal_state()
예제 #2
0
               processor=processor, warmup_steps=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)

# learning rate
# 
dqn.compile(Adam(lr=.00025), metrics=['mae'])



#=== TRAIN ===#

if args.mode == 'train':
    checkpoint_weights_filename = 'weights_{step}.h5f'
    log_filename = 'dqn_log.json'
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]


    dqn.fit(env, callbacks=callbacks, steps=1750000, log_interval=10000)

    # After training is done, we save the final weights
    dqn.save_weights('final_weights.h5f', overwrite=True)



#=== TEST ===#

elif args.mode == 'test':
    dqn.load_weights('trained_data/final_weights.h5f')
    dqn.test(env, episodes=10, visualize=True)