예제 #1
0
def load_data_make_train_play():
    frame_stack, moves, rewards = load_saved_data()
    model = make_a_model(9, 300, 25, 1)
    rewards = pong.discount_rewards(rewards, 0.95)
    trained_model = train_model(model, frame_stack, moves, rewards, 5, 100)
    pong.play_games(10,
                    21,
                    single_player=True,
                    show_moves=True,
                    game_agent=trained_model,
                    save_moves=False)


#load_data_make_train_play()
#make_train_and_return_model()
#main()
# frame_stack, moves, rewards=load_saved_data()
# print(len(moves))
# print(frame_stack[0])
# print(frame_stack[100])
# print(frame_stack[200])
# print(frame_stack[300])
# print(frame_stack[400])
# # print(frame_stack[5])
# # print(frame_stack[6])
# # print(frame_stack[7])
# # print(frame_stack[8])
# # print(frame_stack[9])
# # print(frame_stack[10])
# print(moves[0:200])
# # print(len([x for x in rewards if x==1]))
예제 #2
0
def make_train_and_return_model():
    frame_stack, moves, rewards = pong.play_games(5000, 21, show_moves=False, save_moves=True)
    model = make_a_model(4, 50, 10, 1)
    model = train_model(model, frame_stack, moves, rewards, 10, 200)
    model.save("model_saved")
    # print(frame_stack[1].reshape(1,9))
    # print(model.predict(frame_stack[1].reshape(1,9)))
    pong.play_games(10, 20, game_agent=model)
예제 #3
0
def main_train_evolve():
    my_model = make_a_model(3, 64, 32)  # my main model
    theta = get_model_weights(my_model)  # my main weights
    std_dev = 0.02 #drop this guy
    lr = 0.1 #play with this
    generations = 500  # instaed of epoch/episode

    n_trials = 50
    for generation in range(generations):
        print(f"Season {generation}")
        noise = []
        fitness = []

        for n in range(n_trials):
            if (n % 20 == 0):
                print(n)
            noise.append(np.random.randn(len(theta)) * std_dev)
            set_model_weights(my_model, noise[-1] + theta) #make negative, double the search space
            frames, moves, rewards = pong.play_games(1, 1, game_agent=my_model, save_moves=False, use_turtle=False,
                                                     show_moves=False)
            fitness.append(sum(rewards))
            # print(n, fitness, noise[-1])
        ranked_fittness = centered_ranker(np.array(fitness, dtype=float))
        print(f"Max fitness {max(fitness)} Average fitness {np.average(fitness)}")
        weighted_average_noise = np.dot(ranked_fittness, noise)
        theta += weighted_average_noise * lr
        set_model_weights(my_model, theta)
        my_model.save("model_saved" + str(generation))

    print("awe")
예제 #4
0
def load_model_train_on_play_data_return_results(model_name, games_to_play=100):
    model = load_model("model_saved")
    frames, moves, rewards = load_saved_data()
    model = train_model(model, frames, moves, rewards, 10, 3)
    frames, moves, rewards = pong.play_games(10, 10, show_moves=False, save_moves=False,
                                             game_agent=model)
    return frames, moves, rewards
예제 #5
0
def main():
    play_random = True
    decision_object = None
    games_to_play = 2000
    points_per_game = 21
    single_player = True
    frame_stack, moves, rewards = pong.play_games(games_to_play,
                                                  points_per_game,
                                                  show_moves=False)
예제 #6
0
def load_data_make_train_play():
    frame_stack, moves, rewards = load_saved_data()
    model = make_a_model(4, 300, 25, 1)
    rewards = pong.discount_rewards(rewards, 0.95)
    trained_model = train_model(model, frame_stack, moves, rewards, 5, 100)
    pong.play_games(10, 21, single_player=True, show_moves=True, game_agent=trained_model, save_moves=False)
예제 #7
0
def make_model_play(model_name, games, show_moves, save_moves=False,ball_speed=10):
    model = load_model(model_name)
    frames, moves, rewards = pong.play_games(games, 10, show_moves=show_moves, save_moves=save_moves, game_agent=model,
                                             ball_speed=100, use_turtle=True)
    print(sum(rewards))
    return frames, moves, rewards
예제 #8
0
def generate_random_moves_and_save():
    frame_stack, moves, rewards = pong.play_games(10000, 21, single_player=True, show_moves=False, game_agent=None,
                                                  save_moves=True, use_turtle=False)
    print(len(frame_stack))