Beispiel #1
0
    def make_action(self):
        # menu = Menu(self.state.menu)
        # print(menu)
        if self.state.menu == Menu.Game.value:
            if self.action_counter % self.act_every == 0:
                for agent, pad in zip(self.agents, self.pads):
                    agent.act(self.state, pad)
                if self.dump:
                    self.dump_state()
            self.action_counter += 1
            #self.fox.advance(self.state, self.pad)

        elif self.state.menu in [
                menu.value for menu in [Menu.Characters, Menu.Stages]
        ]:
            # FIXME: this is very convoluted
            done = True
            for mm in self.menu_managers:
                if not mm.reached:
                    done = False
                mm.move(self.state)

            if done:
                if self.settings_mm.reached:
                    self.movie.play(self.pads[0])
                else:
                    self.settings_mm.move(self.state)
        elif self.state.menu == Menu.PostGame.value:
            self.spam(Button.START)
        else:
            print("Weird menu state", self.state.menu)
Beispiel #2
0
 def update(self):
     # energy of trees
     for tree in self.trees:
         tree.update()
     # print("trees updated")
     # robots states
     for agent in self.agents:
         agents = [ag for ag in self.agents if ag != agent]
         self.objects["agents"] = agents
         agent.act(self.objects)
     # store new data
     self.objects["trees"] = self.trees
     self.objects["agents"] = self.agents
Beispiel #3
0
	def play_games(self, agent:agent.AbstractAgent, buffer:replay_buffer.DataContainer):
		for _ in range(self.config.game_count_per_iteration):
			t = 0
			env = environment.Env()
			T = replay_buffer.Trajectory()
			while not env.terminate():
				state = env.get_state()
				action = agent.act(state, t%2)
				env.step(action)
				env.render()
				T.add(state, action)
				t += 1
			T.result = env.result()
			buffer.save_game(T)
Beispiel #4
0
 def game(self, max_iter, g):
     rewards = []
     for (agent, env) in zip(self.agents, self.environments):
         env.reset(g)
         agent.reset(g)
         game_reward = 0
         for i in range(1, max_iter + 1):
             observation = env.observe()
             action = agent.act(observation)
             (reward, stop) = env.act(action)
             agent.reward(observation, action, reward)
             game_reward += reward
             if stop == "stop":
                 break
         rewards.append(game_reward)
     return sum(rewards) / len(rewards)
    def run_episode(self, agent):
        self.reward = 0
        s = self.env.reset()
        done = False
        while not done:
            self.env.render()
            a = agent.act(s)
            s_, r, done, _ = self.env.step(a)
            agent.learn((s, a, s_, r, done))
            self.reward += r
            s = s_

        self.episode_count += 1
        self.reward_buffer.append(self.reward)
        average = sum(self.reward_buffer) / len(self.reward_buffer)

        print("Episode Nr. {} \nScore: {} \nAverage: {}".format(
            self.episode_count, self.reward, average))
Beispiel #6
0
def show_results(env, brain_name, agent):
    # load the weights from file
    agent.qnetwork_local.load_state_dict(
        torch.load('./results/checkpoint_dqn_vec.pth',
                   map_location=lambda storage, loc: storage))
    with torch.no_grad():
        for i in range(3):
            score = 0
            env_info = env.reset(train_mode=False)[brain_name]
            state = env_info.vector_observations[0]
            while True:
                action = agent.act(state)
                env_info = env.step(action)[brain_name]
                next_state = env_info.vector_observations[
                    0]  # get the next state
                reward = env_info.rewards[0]  # get the reward
                done = env_info.local_done[0]  # see if episode has finished
                state = next_state
                score += reward
                if done:
                    break
            print('episodes %d get score %d' % (i, score))
    env.close()
Beispiel #7
0
def best_action():

    observation = Struct(**request.json["observation"])
    configuration = Struct(**request.json["configuration"])

    return {"best_action": act(observation, configuration)}
Beispiel #8
0
    for e in range(episode_count):
        # at each episode, reset environment to starting position
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        score = 0

        done = False
        rewards.append(0.0)
        firststeps = 20

        while not done and (score < max_score if max_score else True):
            # show game graphics
            env.render()

            # select action, observe environment, calculate reward
            action = agent.act(state)
            u = calculate_u(agent.action_space, action)
            next_state, reward, done, _ = env.step([u])
            next_state = np.reshape(next_state, [1, state_size])

            if firststeps > 0: firststeps -= 1
            else:
                angle = np.angle([next_state[0][0] + 1j * next_state[0][1]],
                                 True)
                score += abs(angle[0])

            # save experience and update current state
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            rewards[-1] += reward
Beispiel #9
0
def play(agent, opt, random_action=False):
    filter_examine_cmd = False
    infos_to_request = agent.infos_to_request
    infos_to_request.max_score = True  # Needed to normalize the scores.
    game_path = opt.game_dir + "/" + (
        str(opt.difficulty_level) + "/" + opt.mode  if opt.difficulty_level != '' else opt.game_dir + "/" + opt.mode )
    manual_world_graphs = {}
    if opt.graph_emb_type and 'world' in opt.graph_type:
        print("Loading Knowledge Graph ... ", end='')
        agent.kg_graph, _, _= construct_kg(game_path + '/conceptnet_subgraph.txt')
        print(' DONE')
        # optional: Use complete or brief manually extracted conceptnet subgraph for the agent
        print("Loading Manual World Graphs ... ", end='')
        manual_world_graphs = load_manual_graphs(game_path + '/manual_subgraph_brief')

    if opt.game_name:
        game_path = game_path + "/"+ opt.game_name

    env, game_file_names = dataset.get_game_env(game_path, infos_to_request, opt.max_step_per_episode, opt.batch_size,
                                                opt.mode, opt.verbose)
    # Get Goals as graphs
    goal_graphs = {}
    for game_file in env.gamefiles:
        goal_graph = get_goal_graph(game_file)
        if goal_graph:
            game_id = game_file.split('-')[-1].split('.')[0]
            goal_graphs[game_id] = goal_graph

    # Collect some statistics: nb_steps, final reward.
    total_games_count = len(game_file_names)
    game_identifiers, avg_moves, avg_scores, avg_norm_scores, max_poss_scores = [], [], [], [], []

    for no_episode in (range(opt.nepisodes)):
        if not random_action:
            random.seed(no_episode)
            np.random.seed(no_episode)
            torch.manual_seed(no_episode)
            if torch.cuda.is_available():
                torch.cuda.manual_seed(no_episode)
            env.seed(no_episode)

        agent.start_episode(opt.batch_size)
        avg_eps_moves, avg_eps_scores, avg_eps_norm_scores = [], [], []
        num_games = total_games_count
        game_max_scores = []
        game_names = []
        while num_games > 0:
            obs, infos = env.reset()  # Start new episode.
            if filter_examine_cmd:
                for commands_ in infos["admissible_commands"]: # [open refri, take apple from refrigeration]
                    for cmd_ in [cmd for cmd in commands_ if cmd.split()[0] in ["examine", "look"]]:
                        commands_.remove(cmd_)

            batch_size = len(obs)
            num_games -= len(obs)
            game_goal_graphs = [None] * batch_size
            max_scores = []
            game_ids = []
            game_manual_world_graph = [None] * batch_size
            for b, game in enumerate(infos["game"]):
                max_scores.append(game.max_score)
                if "uuid" in game.metadata:
                    game_id = game.metadata["uuid"].split("-")[-1]
                    game_ids.append(game_id)
                    game_names.append(game_id)
                    game_max_scores.append(game.max_score)
                    if len(goal_graphs):
                        game_goal_graphs[b] = goal_graphs[game_id]
                    if len(manual_world_graphs):
                        game_manual_world_graph[b] = manual_world_graphs[game_id]

            if not game_ids:
                game_ids = range(num_games,num_games+batch_size)
                game_names.extend(game_ids)

            commands = ["restart"]*len(obs)
            scored_commands = [[] for b in range(batch_size)]
            last_scores = [0.0]*len(obs)
            scores = [0.0]*len(obs)
            dones = [False]*len(obs)
            nb_moves = [0]*len(obs)
            infos["goal_graph"] = game_goal_graphs
            infos["manual_world_graph"] = game_manual_world_graph
            agent.reset_parameters(opt.batch_size)
            for step_no in range(opt.max_step_per_episode):
                nb_moves = [step + int(not done) for step, done in zip(nb_moves, dones)]

                if agent.graph_emb_type and ('local' in agent.graph_type or 'world' in agent.graph_type):
                    agent.update_current_graph(obs, commands, scored_commands, infos, opt.graph_mode)

                commands = agent.act(obs, scores, dones, infos, scored_commands, random_action)
                obs, scores, dones, infos = env.step(commands)
                infos["goal_graph"] = game_goal_graphs
                infos["manual_world_graph"] = game_manual_world_graph

                for b in range(batch_size):
                    if scores[b] - last_scores[b] > 0:
                        last_scores[b] = scores[b]
                        scored_commands[b].append(commands[b])

                if all(dones):
                    break
                if step_no == opt.max_step_per_episode - 1:
                    dones = [True for _ in dones]
            agent.act(obs, scores, dones, infos, scored_commands, random_action)  # Let the agent know the game is done.

            if opt.verbose:
                print(".", end="")
            avg_eps_moves.extend(nb_moves)
            avg_eps_scores.extend(scores)
            avg_eps_norm_scores.extend([score/max_score for score, max_score in zip(scores, max_scores)])
        if opt.verbose:
            print("*", end="")
        agent.end_episode()
        game_identifiers.append(game_names)
        avg_moves.append(avg_eps_moves) # episode x # games
        avg_scores.append(avg_eps_scores)
        avg_norm_scores.append(avg_eps_norm_scores)
        max_poss_scores.append(game_max_scores)
    env.close()
    game_identifiers = np.array(game_identifiers)
    avg_moves = np.array(avg_moves)
    avg_scores = np.array(avg_scores)
    avg_norm_scores = np.array(avg_norm_scores)
    max_poss_scores = np.array(max_poss_scores)
    if opt.verbose:
        idx = np.apply_along_axis(np.argsort, axis=1, arr=game_identifiers)
        game_avg_moves = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_moves))), axis=0)
        game_norm_scores = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_norm_scores))), axis=0)
        game_avg_scores = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_scores))), axis=0)

        msg = "\nGame Stats:\n-----------\n" + "\n".join(
            "  Game_#{} = Score: {:5.2f} Norm_Score: {:5.2f} Moves: {:5.2f}/{}".format(game_no,avg_score,
                                                                                            norm_score, avg_move,
                                                                                            opt.max_step_per_episode)
            for game_no, (norm_score, avg_score, avg_move) in
            enumerate(zip(game_norm_scores, game_avg_scores, game_avg_moves)))

        print(msg)

        total_avg_moves = np.mean(game_avg_moves)
        total_avg_scores = np.mean(game_avg_scores)
        total_norm_scores = np.mean(game_norm_scores)
        msg = opt.mode+" stats: avg. score: {:4.2f}; norm. avg. score: {:4.2f}; avg. steps: {:5.2f}; \n"
        print(msg.format(total_avg_scores, total_norm_scores,total_avg_moves))

        ## Dump log files ......
        str_result = {opt.mode + 'game_ids': game_identifiers, opt.mode + 'max_scores': max_poss_scores,
                      opt.mode + 'scores_runs': avg_scores, opt.mode + 'norm_score_runs': avg_norm_scores,
                      opt.mode + 'moves_runs': avg_moves}

        results_ofile = getUniqueFileHandler(opt.results_filename + '_' +opt.mode+'_results')
        pickle.dump(str_result, results_ofile)
    return avg_scores, avg_norm_scores, avg_moves
Beispiel #10
0
    new_state = queryStateAction(state_str, action)
    return json.loads(new_state)


if __name__ == "__main__":
    #import mcts_agent as agent
    import agent
    i = 0
    line = "start"
    while (line != ""):
        #print(str(line) + " " +  str(i), file=sys.stderr)
        line = sys.stdin.readline()
        #print line, i
        if (i == 0):
            #print(str(line) + " " +  str(i), file=sys.stderr)

            action_map = json.loads(line)
            agent.initialise(action_map)
            sys.stdout.write("INIT_DONE" + os.linesep);
            sys.stdout.flush()

        elif (line != ""):
            state_map = json.loads(line)
            action = agent.act(state_map)
            action_map = {}
            action_map["action"] = action
            sys.stdout.write("0" + json.dumps(action_map) + os.linesep);
            sys.stdout.flush()
            #print(str(action_map), file=sys.stderr)

        i += 1
Beispiel #11
0
    new_state = queryStateAction(state_str, action)
    return json.loads(new_state)


if __name__ == "__main__":
    #import mcts_agent as agent
    import agent
    i = 0
    line = "start"
    while (line != ""):
        #print(str(line) + " " +  str(i), file=sys.stderr)
        line = sys.stdin.readline()
        #print line, i
        if (i == 0):
            #print(str(line) + " " +  str(i), file=sys.stderr)

            action_map = json.loads(line)
            agent.initialise(action_map)
            sys.stdout.write("INIT_DONE" + os.linesep)
            sys.stdout.flush()

        elif (line != ""):
            state_map = json.loads(line)
            action = agent.act(state_map)
            action_map = {}
            action_map["action"] = action
            sys.stdout.write("0" + json.dumps(action_map) + os.linesep)
            sys.stdout.flush()
            #print(str(action_map), file=sys.stderr)

        i += 1
Beispiel #12
0
    def run_episode(self, agent):
        self.reward = 0
        s = self.env.reset()
        done = False
        step=0
        r = 0
        actions = np.zeros(5)
        while not done:
            step+=1
            a = agent.act(s)
            if a == 0:
                actions[0] +=1
                r -= 1

            elif a == 1:
                actions[1] += 1
                r += 5

            elif a == 2:
                actions[2] += 1
                r += 5

            elif a == 3:
                actions[3] += 1
                r += 1

            elif a == 4:
                actions[4] += 1
                r += 1

            index, s_, price, gain, terminal, money = self.env.step(a)

            gain = gain if not terminal else 0

            if terminal:
                r -= 4000
                print("step: " + str(step) + " money: " +str(money), " rewards: " + str(r), " action", actions)
                self.steps.append(step)
                self.wallet.append(money)
                self.rewards.append(r)
                done = True

            elif step > 3300:
                if money > 3000:
                    r += 5000
                print("step: " + str(step) + " money: " + str(money)," rewards:"+ str(r), " action", actions)
                self.steps.append(step)
                self.wallet.append(money)
                self.rewards.append(r)
                done = True

            if gain > 0:
                r += 200
            if money>3000:
                r+=15
            r+=1
            agent.learn((s, a, s_, r, terminal))
            self.reward += r
            s = s_

        self.episode_count += 1
        self.reward_buffer.append(self.reward)
        average = sum(self.reward_buffer) / len(self.reward_buffer)

        print("Episode Nr. {} \nScore: {} \nAverage: {}".format(
            self.episode_count, self.reward, average))