def main(): if(len(sys.argv) != 5): print("usage:{} <env> <model_json> <weights> <directory>".format(sys.argv[0])) return sys.exit() env = gym.make(sys.argv[1]) env.frameskip = 1 with open(sys.argv[2]) as json_file: model = model_from_json(json.load(json_file),{"Eq9":Eq9}) model.load_weights(sys.argv[3]) epsilon = 0.01 input_shape = (84,84) history_size = 4 eval_size = 1 directory = sys.argv[4] history_prep = HistoryPreprocessor(history_size) atari_prep = AtariPreprocessor(input_shape,0,999) numpy_prep = NumpyPreprocessor() preprocessors = PreprocessorSequence([atari_prep, history_prep, numpy_prep]) #from left to right policy = GreedyEpsilonPolicy(epsilon) agent = DQNAgent(model, preprocessors, None, policy, 0.99, None,None,None,None) env = gym.wrappers.Monitor(env,directory,force=True) reward_arr, length_arr = agent.evaluate_detailed(env,eval_size,render=False, verbose=True)
def main(): if (len(sys.argv) != 6): print("usage:{} <env> <model_json> <weights> <render> <random>".format( sys.argv[0])) return sys.exit() env = gym.make(sys.argv[1]) env.frameskip = 1 with open(sys.argv[2]) as json_file: model = model_from_json(json.load(json_file), {"Eq9": Eq9}) model.load_weights(sys.argv[3]) epsilon = 0.01 input_shape = (84, 84) history_size = 4 eval_size = 100 render = (sys.argv[4] == "y") history_prep = HistoryPreprocessor(history_size) atari_prep = AtariPreprocessor(input_shape, 0, 999) numpy_prep = NumpyPreprocessor() preprocessors = PreprocessorSequence( [atari_prep, history_prep, numpy_prep]) #from left to right if (sys.argv[5] == "y"): print("using random policy") policy = UniformRandomPolicy(env.action_space.n) else: print("using greedy policy") policy = GreedyEpsilonPolicy(epsilon) agent = DQNAgent(model, preprocessors, None, policy, 0.99, None, None, None, None) agent.add_keras_custom_layers({"Eq9": Eq9}) reward_arr, length_arr = agent.evaluate_detailed(env, eval_size, render=render, verbose=True) print("\rPlayed {} games, reward:M={}, SD={} length:M={}, SD={}".format( eval_size, np.mean(reward_arr), np.std(reward_arr), np.mean(length_arr), np.std(reward_arr))) print("max:{} min:{}".format(np.max(reward_arr), np.min(reward_arr))) plt.hist(reward_arr) plt.show()
def testPerformance(self): """ Test to make sure each model(DQN, DDQN, DoubleQN) could be created and compiled """ #create a model of the world env = gym.make("SpaceInvaders-v0") env.frameskip = 1 #create a fake keras model input_shape = (84, 84) window = 4 num_actions = env.action_space.n model = Sequential(name="test_model") model.add( Convolution2D(filters=16, kernel_size=8, strides=4, activation='relu', input_shape=(input_shape[0], input_shape[1], window))) model.add( Convolution2D(filters=32, kernel_size=4, strides=2, activation='relu')) model.add( Convolution2D(filters=64, kernel_size=3, strides=1, activation='relu')) model.add(Flatten()) model.add(Dense(units=512, activation='relu')) model.add(Dense(units=num_actions, activation='linear')) #create loss function & optimizer optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) loss_func = huber_loss #preprocessors history_prep = HistoryPreprocessor(4) atari_prep = AtariPreprocessor(input_shape, 0, 999) numpy_prep = NumpyPreprocessor() preprocessors = PreprocessorSequence( [atari_prep, history_prep, numpy_prep]) #from left to right memory = ActionReplayMemory(100000, 4) #policy = LinearDecayGreedyEpsilonPolicy(1, 0.1,100000) policy = SamePolicy(1) #agent = DQNAgent(model, preprocessors, memory, policy,0.99, target_update_freq,None,train_freq,batch_size) dqn_agent = DQNAgent(model, preprocessors, memory, policy, 0.99, 10000, None, 4, 32) dqn_agent.compile(optimizer, loss_func) total_time = 0 times = 50 for i in range(0, times): start_time = time.time() dqn_agent.evaluate_detailed(env, 1) total_time += (time.time() - start_time) sys.stdout.write('\r{}'.format(i)) sys.stdout.flush() print("average evaluation time:{} total time:{}".format( total_time / times, total_time))