def load_data_make_train_play(): frame_stack, moves, rewards = load_saved_data() model = make_a_model(9, 300, 25, 1) rewards = pong.discount_rewards(rewards, 0.95) trained_model = train_model(model, frame_stack, moves, rewards, 5, 100) pong.play_games(10, 21, single_player=True, show_moves=True, game_agent=trained_model, save_moves=False) #load_data_make_train_play() #make_train_and_return_model() #main() # frame_stack, moves, rewards=load_saved_data() # print(len(moves)) # print(frame_stack[0]) # print(frame_stack[100]) # print(frame_stack[200]) # print(frame_stack[300]) # print(frame_stack[400]) # # print(frame_stack[5]) # # print(frame_stack[6]) # # print(frame_stack[7]) # # print(frame_stack[8]) # # print(frame_stack[9]) # # print(frame_stack[10]) # print(moves[0:200]) # # print(len([x for x in rewards if x==1]))
def make_train_and_return_model(): frame_stack, moves, rewards = pong.play_games(5000, 21, show_moves=False, save_moves=True) model = make_a_model(4, 50, 10, 1) model = train_model(model, frame_stack, moves, rewards, 10, 200) model.save("model_saved") # print(frame_stack[1].reshape(1,9)) # print(model.predict(frame_stack[1].reshape(1,9))) pong.play_games(10, 20, game_agent=model)
def main_train_evolve(): my_model = make_a_model(3, 64, 32) # my main model theta = get_model_weights(my_model) # my main weights std_dev = 0.02 #drop this guy lr = 0.1 #play with this generations = 500 # instaed of epoch/episode n_trials = 50 for generation in range(generations): print(f"Season {generation}") noise = [] fitness = [] for n in range(n_trials): if (n % 20 == 0): print(n) noise.append(np.random.randn(len(theta)) * std_dev) set_model_weights(my_model, noise[-1] + theta) #make negative, double the search space frames, moves, rewards = pong.play_games(1, 1, game_agent=my_model, save_moves=False, use_turtle=False, show_moves=False) fitness.append(sum(rewards)) # print(n, fitness, noise[-1]) ranked_fittness = centered_ranker(np.array(fitness, dtype=float)) print(f"Max fitness {max(fitness)} Average fitness {np.average(fitness)}") weighted_average_noise = np.dot(ranked_fittness, noise) theta += weighted_average_noise * lr set_model_weights(my_model, theta) my_model.save("model_saved" + str(generation)) print("awe")
def load_model_train_on_play_data_return_results(model_name, games_to_play=100): model = load_model("model_saved") frames, moves, rewards = load_saved_data() model = train_model(model, frames, moves, rewards, 10, 3) frames, moves, rewards = pong.play_games(10, 10, show_moves=False, save_moves=False, game_agent=model) return frames, moves, rewards
def main(): play_random = True decision_object = None games_to_play = 2000 points_per_game = 21 single_player = True frame_stack, moves, rewards = pong.play_games(games_to_play, points_per_game, show_moves=False)
def load_data_make_train_play(): frame_stack, moves, rewards = load_saved_data() model = make_a_model(4, 300, 25, 1) rewards = pong.discount_rewards(rewards, 0.95) trained_model = train_model(model, frame_stack, moves, rewards, 5, 100) pong.play_games(10, 21, single_player=True, show_moves=True, game_agent=trained_model, save_moves=False)
def make_model_play(model_name, games, show_moves, save_moves=False,ball_speed=10): model = load_model(model_name) frames, moves, rewards = pong.play_games(games, 10, show_moves=show_moves, save_moves=save_moves, game_agent=model, ball_speed=100, use_turtle=True) print(sum(rewards)) return frames, moves, rewards
def generate_random_moves_and_save(): frame_stack, moves, rewards = pong.play_games(10000, 21, single_player=True, show_moves=False, game_agent=None, save_moves=True, use_turtle=False) print(len(frame_stack))