def make_action(self): # menu = Menu(self.state.menu) # print(menu) if self.state.menu == Menu.Game.value: if self.action_counter % self.act_every == 0: for agent, pad in zip(self.agents, self.pads): agent.act(self.state, pad) if self.dump: self.dump_state() self.action_counter += 1 #self.fox.advance(self.state, self.pad) elif self.state.menu in [ menu.value for menu in [Menu.Characters, Menu.Stages] ]: # FIXME: this is very convoluted done = True for mm in self.menu_managers: if not mm.reached: done = False mm.move(self.state) if done: if self.settings_mm.reached: self.movie.play(self.pads[0]) else: self.settings_mm.move(self.state) elif self.state.menu == Menu.PostGame.value: self.spam(Button.START) else: print("Weird menu state", self.state.menu)
def update(self): # energy of trees for tree in self.trees: tree.update() # print("trees updated") # robots states for agent in self.agents: agents = [ag for ag in self.agents if ag != agent] self.objects["agents"] = agents agent.act(self.objects) # store new data self.objects["trees"] = self.trees self.objects["agents"] = self.agents
def play_games(self, agent:agent.AbstractAgent, buffer:replay_buffer.DataContainer): for _ in range(self.config.game_count_per_iteration): t = 0 env = environment.Env() T = replay_buffer.Trajectory() while not env.terminate(): state = env.get_state() action = agent.act(state, t%2) env.step(action) env.render() T.add(state, action) t += 1 T.result = env.result() buffer.save_game(T)
def game(self, max_iter, g): rewards = [] for (agent, env) in zip(self.agents, self.environments): env.reset(g) agent.reset(g) game_reward = 0 for i in range(1, max_iter + 1): observation = env.observe() action = agent.act(observation) (reward, stop) = env.act(action) agent.reward(observation, action, reward) game_reward += reward if stop == "stop": break rewards.append(game_reward) return sum(rewards) / len(rewards)
def run_episode(self, agent): self.reward = 0 s = self.env.reset() done = False while not done: self.env.render() a = agent.act(s) s_, r, done, _ = self.env.step(a) agent.learn((s, a, s_, r, done)) self.reward += r s = s_ self.episode_count += 1 self.reward_buffer.append(self.reward) average = sum(self.reward_buffer) / len(self.reward_buffer) print("Episode Nr. {} \nScore: {} \nAverage: {}".format( self.episode_count, self.reward, average))
def show_results(env, brain_name, agent): # load the weights from file agent.qnetwork_local.load_state_dict( torch.load('./results/checkpoint_dqn_vec.pth', map_location=lambda storage, loc: storage)) with torch.no_grad(): for i in range(3): score = 0 env_info = env.reset(train_mode=False)[brain_name] state = env_info.vector_observations[0] while True: action = agent.act(state) env_info = env.step(action)[brain_name] next_state = env_info.vector_observations[ 0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished state = next_state score += reward if done: break print('episodes %d get score %d' % (i, score)) env.close()
def best_action(): observation = Struct(**request.json["observation"]) configuration = Struct(**request.json["configuration"]) return {"best_action": act(observation, configuration)}
for e in range(episode_count): # at each episode, reset environment to starting position state = env.reset() state = np.reshape(state, [1, state_size]) score = 0 done = False rewards.append(0.0) firststeps = 20 while not done and (score < max_score if max_score else True): # show game graphics env.render() # select action, observe environment, calculate reward action = agent.act(state) u = calculate_u(agent.action_space, action) next_state, reward, done, _ = env.step([u]) next_state = np.reshape(next_state, [1, state_size]) if firststeps > 0: firststeps -= 1 else: angle = np.angle([next_state[0][0] + 1j * next_state[0][1]], True) score += abs(angle[0]) # save experience and update current state agent.remember(state, action, reward, next_state, done) state = next_state rewards[-1] += reward
def play(agent, opt, random_action=False): filter_examine_cmd = False infos_to_request = agent.infos_to_request infos_to_request.max_score = True # Needed to normalize the scores. game_path = opt.game_dir + "/" + ( str(opt.difficulty_level) + "/" + opt.mode if opt.difficulty_level != '' else opt.game_dir + "/" + opt.mode ) manual_world_graphs = {} if opt.graph_emb_type and 'world' in opt.graph_type: print("Loading Knowledge Graph ... ", end='') agent.kg_graph, _, _= construct_kg(game_path + '/conceptnet_subgraph.txt') print(' DONE') # optional: Use complete or brief manually extracted conceptnet subgraph for the agent print("Loading Manual World Graphs ... ", end='') manual_world_graphs = load_manual_graphs(game_path + '/manual_subgraph_brief') if opt.game_name: game_path = game_path + "/"+ opt.game_name env, game_file_names = dataset.get_game_env(game_path, infos_to_request, opt.max_step_per_episode, opt.batch_size, opt.mode, opt.verbose) # Get Goals as graphs goal_graphs = {} for game_file in env.gamefiles: goal_graph = get_goal_graph(game_file) if goal_graph: game_id = game_file.split('-')[-1].split('.')[0] goal_graphs[game_id] = goal_graph # Collect some statistics: nb_steps, final reward. total_games_count = len(game_file_names) game_identifiers, avg_moves, avg_scores, avg_norm_scores, max_poss_scores = [], [], [], [], [] for no_episode in (range(opt.nepisodes)): if not random_action: random.seed(no_episode) np.random.seed(no_episode) torch.manual_seed(no_episode) if torch.cuda.is_available(): torch.cuda.manual_seed(no_episode) env.seed(no_episode) agent.start_episode(opt.batch_size) avg_eps_moves, avg_eps_scores, avg_eps_norm_scores = [], [], [] num_games = total_games_count game_max_scores = [] game_names = [] while num_games > 0: obs, infos = env.reset() # Start new episode. if filter_examine_cmd: for commands_ in infos["admissible_commands"]: # [open refri, take apple from refrigeration] for cmd_ in [cmd for cmd in commands_ if cmd.split()[0] in ["examine", "look"]]: commands_.remove(cmd_) batch_size = len(obs) num_games -= len(obs) game_goal_graphs = [None] * batch_size max_scores = [] game_ids = [] game_manual_world_graph = [None] * batch_size for b, game in enumerate(infos["game"]): max_scores.append(game.max_score) if "uuid" in game.metadata: game_id = game.metadata["uuid"].split("-")[-1] game_ids.append(game_id) game_names.append(game_id) game_max_scores.append(game.max_score) if len(goal_graphs): game_goal_graphs[b] = goal_graphs[game_id] if len(manual_world_graphs): game_manual_world_graph[b] = manual_world_graphs[game_id] if not game_ids: game_ids = range(num_games,num_games+batch_size) game_names.extend(game_ids) commands = ["restart"]*len(obs) scored_commands = [[] for b in range(batch_size)] last_scores = [0.0]*len(obs) scores = [0.0]*len(obs) dones = [False]*len(obs) nb_moves = [0]*len(obs) infos["goal_graph"] = game_goal_graphs infos["manual_world_graph"] = game_manual_world_graph agent.reset_parameters(opt.batch_size) for step_no in range(opt.max_step_per_episode): nb_moves = [step + int(not done) for step, done in zip(nb_moves, dones)] if agent.graph_emb_type and ('local' in agent.graph_type or 'world' in agent.graph_type): agent.update_current_graph(obs, commands, scored_commands, infos, opt.graph_mode) commands = agent.act(obs, scores, dones, infos, scored_commands, random_action) obs, scores, dones, infos = env.step(commands) infos["goal_graph"] = game_goal_graphs infos["manual_world_graph"] = game_manual_world_graph for b in range(batch_size): if scores[b] - last_scores[b] > 0: last_scores[b] = scores[b] scored_commands[b].append(commands[b]) if all(dones): break if step_no == opt.max_step_per_episode - 1: dones = [True for _ in dones] agent.act(obs, scores, dones, infos, scored_commands, random_action) # Let the agent know the game is done. if opt.verbose: print(".", end="") avg_eps_moves.extend(nb_moves) avg_eps_scores.extend(scores) avg_eps_norm_scores.extend([score/max_score for score, max_score in zip(scores, max_scores)]) if opt.verbose: print("*", end="") agent.end_episode() game_identifiers.append(game_names) avg_moves.append(avg_eps_moves) # episode x # games avg_scores.append(avg_eps_scores) avg_norm_scores.append(avg_eps_norm_scores) max_poss_scores.append(game_max_scores) env.close() game_identifiers = np.array(game_identifiers) avg_moves = np.array(avg_moves) avg_scores = np.array(avg_scores) avg_norm_scores = np.array(avg_norm_scores) max_poss_scores = np.array(max_poss_scores) if opt.verbose: idx = np.apply_along_axis(np.argsort, axis=1, arr=game_identifiers) game_avg_moves = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_moves))), axis=0) game_norm_scores = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_norm_scores))), axis=0) game_avg_scores = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_scores))), axis=0) msg = "\nGame Stats:\n-----------\n" + "\n".join( " Game_#{} = Score: {:5.2f} Norm_Score: {:5.2f} Moves: {:5.2f}/{}".format(game_no,avg_score, norm_score, avg_move, opt.max_step_per_episode) for game_no, (norm_score, avg_score, avg_move) in enumerate(zip(game_norm_scores, game_avg_scores, game_avg_moves))) print(msg) total_avg_moves = np.mean(game_avg_moves) total_avg_scores = np.mean(game_avg_scores) total_norm_scores = np.mean(game_norm_scores) msg = opt.mode+" stats: avg. score: {:4.2f}; norm. avg. score: {:4.2f}; avg. steps: {:5.2f}; \n" print(msg.format(total_avg_scores, total_norm_scores,total_avg_moves)) ## Dump log files ...... str_result = {opt.mode + 'game_ids': game_identifiers, opt.mode + 'max_scores': max_poss_scores, opt.mode + 'scores_runs': avg_scores, opt.mode + 'norm_score_runs': avg_norm_scores, opt.mode + 'moves_runs': avg_moves} results_ofile = getUniqueFileHandler(opt.results_filename + '_' +opt.mode+'_results') pickle.dump(str_result, results_ofile) return avg_scores, avg_norm_scores, avg_moves
new_state = queryStateAction(state_str, action) return json.loads(new_state) if __name__ == "__main__": #import mcts_agent as agent import agent i = 0 line = "start" while (line != ""): #print(str(line) + " " + str(i), file=sys.stderr) line = sys.stdin.readline() #print line, i if (i == 0): #print(str(line) + " " + str(i), file=sys.stderr) action_map = json.loads(line) agent.initialise(action_map) sys.stdout.write("INIT_DONE" + os.linesep); sys.stdout.flush() elif (line != ""): state_map = json.loads(line) action = agent.act(state_map) action_map = {} action_map["action"] = action sys.stdout.write("0" + json.dumps(action_map) + os.linesep); sys.stdout.flush() #print(str(action_map), file=sys.stderr) i += 1
new_state = queryStateAction(state_str, action) return json.loads(new_state) if __name__ == "__main__": #import mcts_agent as agent import agent i = 0 line = "start" while (line != ""): #print(str(line) + " " + str(i), file=sys.stderr) line = sys.stdin.readline() #print line, i if (i == 0): #print(str(line) + " " + str(i), file=sys.stderr) action_map = json.loads(line) agent.initialise(action_map) sys.stdout.write("INIT_DONE" + os.linesep) sys.stdout.flush() elif (line != ""): state_map = json.loads(line) action = agent.act(state_map) action_map = {} action_map["action"] = action sys.stdout.write("0" + json.dumps(action_map) + os.linesep) sys.stdout.flush() #print(str(action_map), file=sys.stderr) i += 1
def run_episode(self, agent): self.reward = 0 s = self.env.reset() done = False step=0 r = 0 actions = np.zeros(5) while not done: step+=1 a = agent.act(s) if a == 0: actions[0] +=1 r -= 1 elif a == 1: actions[1] += 1 r += 5 elif a == 2: actions[2] += 1 r += 5 elif a == 3: actions[3] += 1 r += 1 elif a == 4: actions[4] += 1 r += 1 index, s_, price, gain, terminal, money = self.env.step(a) gain = gain if not terminal else 0 if terminal: r -= 4000 print("step: " + str(step) + " money: " +str(money), " rewards: " + str(r), " action", actions) self.steps.append(step) self.wallet.append(money) self.rewards.append(r) done = True elif step > 3300: if money > 3000: r += 5000 print("step: " + str(step) + " money: " + str(money)," rewards:"+ str(r), " action", actions) self.steps.append(step) self.wallet.append(money) self.rewards.append(r) done = True if gain > 0: r += 200 if money>3000: r+=15 r+=1 agent.learn((s, a, s_, r, terminal)) self.reward += r s = s_ self.episode_count += 1 self.reward_buffer.append(self.reward) average = sum(self.reward_buffer) / len(self.reward_buffer) print("Episode Nr. {} \nScore: {} \nAverage: {}".format( self.episode_count, self.reward, average))