Exemple #1
0
def main():
    if(len(sys.argv) != 5):
        print("usage:{} <env> <model_json> <weights> <directory>".format(sys.argv[0]))
        return sys.exit()
    env = gym.make(sys.argv[1])
    env.frameskip = 1
    with open(sys.argv[2]) as json_file:
        model = model_from_json(json.load(json_file),{"Eq9":Eq9})
    model.load_weights(sys.argv[3])
    epsilon = 0.01
    input_shape = (84,84)
    history_size = 4
    eval_size = 1
    directory = sys.argv[4]

    history_prep = HistoryPreprocessor(history_size)
    atari_prep = AtariPreprocessor(input_shape,0,999)
    numpy_prep = NumpyPreprocessor()
    preprocessors = PreprocessorSequence([atari_prep, history_prep, numpy_prep]) #from left to right


    policy = GreedyEpsilonPolicy(epsilon)

    agent = DQNAgent(model, preprocessors, None, policy, 0.99, None,None,None,None)
    env = gym.wrappers.Monitor(env,directory,force=True)
    reward_arr, length_arr = agent.evaluate_detailed(env,eval_size,render=False, verbose=True)
Exemple #2
0
def main():
    if (len(sys.argv) != 6):
        print("usage:{} <env> <model_json> <weights> <render> <random>".format(
            sys.argv[0]))
        return sys.exit()
    env = gym.make(sys.argv[1])
    env.frameskip = 1
    with open(sys.argv[2]) as json_file:
        model = model_from_json(json.load(json_file), {"Eq9": Eq9})
    model.load_weights(sys.argv[3])
    epsilon = 0.01
    input_shape = (84, 84)
    history_size = 4
    eval_size = 100
    render = (sys.argv[4] == "y")

    history_prep = HistoryPreprocessor(history_size)
    atari_prep = AtariPreprocessor(input_shape, 0, 999)
    numpy_prep = NumpyPreprocessor()
    preprocessors = PreprocessorSequence(
        [atari_prep, history_prep, numpy_prep])  #from left to right

    if (sys.argv[5] == "y"):
        print("using random policy")
        policy = UniformRandomPolicy(env.action_space.n)
    else:
        print("using greedy policy")
        policy = GreedyEpsilonPolicy(epsilon)

    agent = DQNAgent(model, preprocessors, None, policy, 0.99, None, None,
                     None, None)
    agent.add_keras_custom_layers({"Eq9": Eq9})
    reward_arr, length_arr = agent.evaluate_detailed(env,
                                                     eval_size,
                                                     render=render,
                                                     verbose=True)
    print("\rPlayed {} games, reward:M={}, SD={} length:M={}, SD={}".format(
        eval_size, np.mean(reward_arr), np.std(reward_arr),
        np.mean(length_arr), np.std(reward_arr)))
    print("max:{} min:{}".format(np.max(reward_arr), np.min(reward_arr)))

    plt.hist(reward_arr)
    plt.show()
Exemple #3
0
    def testPerformance(self):
        """
        Test to make sure each model(DQN, DDQN, DoubleQN) could be created and compiled
        """

        #create a model of the world
        env = gym.make("SpaceInvaders-v0")
        env.frameskip = 1
        #create a fake keras model
        input_shape = (84, 84)
        window = 4
        num_actions = env.action_space.n
        model = Sequential(name="test_model")
        model.add(
            Convolution2D(filters=16,
                          kernel_size=8,
                          strides=4,
                          activation='relu',
                          input_shape=(input_shape[0], input_shape[1],
                                       window)))
        model.add(
            Convolution2D(filters=32,
                          kernel_size=4,
                          strides=2,
                          activation='relu'))
        model.add(
            Convolution2D(filters=64,
                          kernel_size=3,
                          strides=1,
                          activation='relu'))
        model.add(Flatten())
        model.add(Dense(units=512, activation='relu'))
        model.add(Dense(units=num_actions, activation='linear'))
        #create loss function & optimizer
        optimizer = Adam(lr=0.001,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        loss_func = huber_loss
        #preprocessors
        history_prep = HistoryPreprocessor(4)
        atari_prep = AtariPreprocessor(input_shape, 0, 999)
        numpy_prep = NumpyPreprocessor()
        preprocessors = PreprocessorSequence(
            [atari_prep, history_prep, numpy_prep])  #from left to right
        memory = ActionReplayMemory(100000, 4)
        #policy = LinearDecayGreedyEpsilonPolicy(1, 0.1,100000)
        policy = SamePolicy(1)

        #agent = DQNAgent(model, preprocessors, memory, policy,0.99, target_update_freq,None,train_freq,batch_size)
        dqn_agent = DQNAgent(model, preprocessors, memory, policy, 0.99, 10000,
                             None, 4, 32)
        dqn_agent.compile(optimizer, loss_func)
        total_time = 0
        times = 50
        for i in range(0, times):
            start_time = time.time()
            dqn_agent.evaluate_detailed(env, 1)
            total_time += (time.time() - start_time)
            sys.stdout.write('\r{}'.format(i))
            sys.stdout.flush()
        print("average evaluation time:{} total time:{}".format(
            total_time / times, total_time))