def train_agent(): plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent() game = SnakeGame() while True: # get old state state_old = agent.get_state(game) # get move final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # train short memory agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember agent.remember(state_old, final_move, reward, state_new, done) if done: game.reset() agent.num_games += 1 agent.train_long_memory() if score > record: record = score agent.model.save() print(f'Game: {agent.num_games}, Score: {score}, Record: {record}') plot_scores.append(score) total_score += score mean_score = total_score / agent.num_games plot_mean_scores.append(mean_score) plot_scores(plot_scores, plot_mean_scores)
if not model: model = neural_network_model(input_size = len(X[0])) model.fit({'input': X}, {'targets': y}, n_epoch=7, snapshot_step=500, show_metric=True, run_id='openai_learning') return model training_data = initial_population() model = train_model(training_data) scores = [] choices = [] for each_game in range(10): score = 0 game_memory = [] game.reset( True ) win = game.getWindow() alive = True prev_obs = [] new_observation = [] while(True): win.getch() if len(prev_obs)==0: action = game.sample() else: action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0]) choices.append(action) new_observation, reward, alive = game.step(action) if not alive: