checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True) elif args.mode == 'explore': home = expanduser("~") path = home + '/.costar/data/test' callbacks = [BinaryDataLogger(path, interval=-1)] #callbacks = [TrainEpisodeLogger1()] dqn.policy = RandomPolicy() dqn.fit(env, visualize=False, callbacks=callbacks, nb_steps=1750000, log_interval=10000)
value_test=.05, nb_steps=5000) #policy = EpsGreedyQPolicy(10) #policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=window * 3, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.enable_dueling_network = True if model_exist: dqn.load_weights(model_path) dqn.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=.5, value_min=.1, value_test=.05, nb_steps=5000) env.set_data_interval(train_start, train_end) train_history = dqn.fit(env, nb_steps=5000, visualize=False, verbose=2, action_repetition=5) env.set_data_interval(train_start, test_end) print('Whole') train_history = dqn.test(env, nb_episodes=2) dqn.save_weights(model_path, overwrite=True) env.save_action_plot('action_validate.csv') plt.axvline(x=train_end - train_start)