def policy_found(q, steps): from rl.environment import Environment from rl.agent import Agent from rl.stateaction import StateAction environment = Environment() agent = Agent(environment, Util.get_state_actions, q, 1, 1) maxStepsAllowed = Util.num_cols + Util.num_rows stepsToGoal = 0 while stepsToGoal < maxStepsAllowed: stepsToGoal += 1 prevState = agent.get_state() agent.test() action = agent.get_action() if prevState != Util.MIN_VALUE: steps.append(StateAction(prevState, action)) if agent.get_state() == Util.get_goal_state(): return True if agent.terminal: return False return agent.get_state() == Util.get_goal_state()
processor=processor, warmup_steps=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) # learning rate # dqn.compile(Adam(lr=.00025), metrics=['mae']) #=== TRAIN ===# if args.mode == 'train': checkpoint_weights_filename = 'weights_{step}.h5f' log_filename = 'dqn_log.json' callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, steps=1750000, log_interval=10000) # After training is done, we save the final weights dqn.save_weights('final_weights.h5f', overwrite=True) #=== TEST ===# elif args.mode == 'test': dqn.load_weights('trained_data/final_weights.h5f') dqn.test(env, episodes=10, visualize=True)