コード例 #1
0
    def Start(self):

        filePath = self.Config.FilePath.BestModel

        cnf = "BestLog/BestLog180126082522.cnf"
        wgt = "BestLog/BestLog180126082522.wgt"
        timeLimit = 0.9

        net = Network()
        net.Load(cnf, wgt)
        net.TimeLimit = timeLimit

        model = Model()
        taskName = "TaskEval/EvalTask114.task"
        task = MujocoTask(model, taskName)
        #task = MujocoTask.LoadRandom(model, self.Config.Task.EvalDir)
        env = MujocoEnv(model)

        agentConfig = self.Config.ViewerAgent
        agent = Agent(agentConfig, net, model, task)

        bestAction = agent.SearchBestAction()

        while True:

            env.SetSimState(task.StartState)

            for action in bestAction:

                env.Step(action)

                #print(env.GetObservation(task))
                env.Render()
コード例 #2
0
    def init_environment(self):
        """ 環境を初期化 """
        self.graph = nx.barabasi_albert_graph(n=self.agent_num, m=self.attach)
        for node in self.graph.nodes(data=True):
            idx, data = node
            age = self.get_agent_age()
            data["agent"] = Agent(
                agent_setting=self.agent_setting,
                id=idx,
                age=age,
                hometown=self.name,
                status=Status.SUSCEPTABLE,
                infection_model=self.infection_model,
            )

        # 公務員を確定
        cs_num = math.ceil(self.agent_num *
                           self.economy_setting["civil_servants_rate"])
        civil_servants = random.sample(self.graph.nodes(data=True), cs_num)
        for _, data in civil_servants:
            data["agent"].is_civil_servant = True

        # 初期感染者を確定
        init_infected = random.sample(self.graph.nodes(data=True),
                                      self.init_infection)
        for _, data in init_infected:
            data["agent"].status = Status.INFECTED

        # 経済パラメータを初期化
        self.finance = self.economy_setting["init_gdp"]
        self.tax_rate = self.economy_setting["tax_rate"]
        self.tmp_tax_revenue = 0

        logger.info('Enviromnent "{}" を初期化しました。人口:{}, 初期感染者:{}'.format(
            self.name.upper(), self.agent_num, self.init_infection))
コード例 #3
0
    def test_is_in_range_for_fourth_percent_out_range(self):
        a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
        assert( a.is_in_range(3, 0.4)  == False)

    #--------------------------------------------------------------------------
    
    
           
コード例 #4
0
    def inflow(self, inflow_agent: Agent, stay_period: int):
        """ 外部環境からのエージェント流入処理 """
        # 流入者の情報を書き換え
        inflow_agent.current_location = self.name
        inflow_agent.stay_period = stay_period

        # 過去に流入したことがある場合はノードの新規作成をスキップ
        if inflow_agent.code in self.code_list:
            return

        # 過去に流入したことがない場合は新規ノードを追加
        self._add_new_node(inflow_agent)
コード例 #5
0
ファイル: Check.py プロジェクト: nikollson/AIAnimation
    def CalcScore(self, net, filePath):

        bestModel = MujocoModelHumanoid()
        bestTask = MujocoTask(bestModel, filePath)
        bestEnv = MujocoEnv(bestModel)

        bestAgent = Agent(self.Config.CheckerAgent, net, bestModel, bestTask)

        bestAction = bestAgent.SearchBestAction()

        bestScore = self.GetScore(bestEnv, bestTask, bestAction)

        return bestScore
コード例 #6
0
def test_agents_play_a_game():
    agent1 = Agent("1", get_random_name(), 0, 0, 0, 0)
    agent2 = Agent("2", get_random_name(), 0, 0, 0, 0)
    players = [agent1, agent2]
    game = Connect4('cls')
    game.level = 4
    game.default()
    game.game_mode = game.game_modes[0]
    [agent1, agent2] = game.logic_play(players=players)
    assert (
        (agent1.record[agent1.WINS] == 1 or agent1.record[agent1.DRAWS] == 1
         or agent1.record[agent1.LOSSES] == 1) and
        (agent2.record[agent2.WINS] == 1 or agent2.record[agent2.DRAWS] == 1
         or agent2.record[agent2.LOSSES] == 1))
コード例 #7
0
 def c_vs_c(self, player_type1, player_type2):
     """
         input : player_type1 and player_type2 are string
         function: Sets the game to play computer vs computer 
         output: a list with two computer agents 
     """
     players = list()
     P1name = self.view.input_option("Enter " + player_type1 + "'s name: ")
     p = self.get_computer_parameters()
     players.append(Agent('1', P1name, p[0], p[1], p[2], p[3]))
     P2name = self.view.input_option("Enter " + player_type2 + "'s name: ")
     p = self.get_computer_parameters()
     players.append(Agent('2', P2name, p[0], p[1], p[2], p[3]))
     return players
コード例 #8
0
ファイル: trainNewAgent.py プロジェクト: brianbob12/Robot_Gym
def main(args):
    oldAgentDir=""
    experianceDir=""
    newAgentDir=""
    trainingIterations=""

    try:
        options, arugments= getopt.getopt(args,"hi:o:e:l:")
    except getopt.GetoptError:
        print("trainNewAgent.py i- <inputDirecory> -o <outputDirectory> -e <experianceDirectory> -l <trainingIterations>")
        sys.exit(2)
    for opt, arg in options:
        if opt =="-h":
            print("trainNewAgent.py -i <inputDirecory> -o <outputDirectory> -e <experianceDirectory> -l <trainingIterations>")
            sys.exit()
        elif opt=="-i":
            oldAgentDir=arg
        elif opt=="-o":
            newAgentDir=arg
        elif opt=="-e":
            experianceDir=arg
        elif opt=="-l":
            trainingIterations=arg

    #main code
    pathList=os.path.abspath("").split("\\")
    pathList=pathList[:-2]
    path=""
    for pathL in pathList:
        path+=pathL+"\\"
    path=path[:-1]

    agent=Agent(path+"/gameData/agentConfig.json")
    agent.importAgent(path+"/"+oldAgentDir)
    agent.loadTrainingData(path+"/"+experianceDir)

    iter=1
    try:
        iter=int(trainingIterations)
    except:
        pass

    for i in range(iter):
        print("Training:",i)
        agent.trainAgent()
    #export agent
    agent.export(path+"/"+newAgentDir)
    exit(1)
コード例 #9
0
ファイル: SelfPlay.py プロジェクト: nikollson/AIAnimation
    def Start(self):

        filePath = self.Config.FilePath.NextGeneration

        net = Network()
        net.Load(filePath.Config, filePath.Weight)

        model = Model()
        task = MujocoTask(model, self.GetRandomFile())
        env = MujocoEnv(model)

        agentConfig = self.Config.SelfPlayAgent
        agent = Agent(agentConfig, net, model, task)

        bestAction = agent.SearchBestAction()
        print(bestAction)

        agent.SaveTrainData(self.Config.GetTrainPath())
コード例 #10
0
def test_reproduce():
    trainer = GeneticTrainer(10, 100)
    parent1 = Agent("1", get_random_name(), 15, 30, 60, 90)
    parent2 = Agent("2", get_random_name(), 5, 20, 50, 80)
    child1, child2 = trainer.reproduce(parent1, parent2)

    parent1_params, parent2_params = [15, 30, 60, 90], [5, 20, 50, 80]
    child1_params = [
        child1.percent_first_move, child1.percent_second_move,
        child1.percent_third_move, child1.percent_fourth_move
    ]
    child2_params = [
        child2.percent_first_move,
        child2.percent_second_move,
        child2.percent_third_move,
        child2.percent_fourth_move,
    ]

    assert (parent1_params != child1_params
            and parent2_params != child2_params)
コード例 #11
0
    def reproduce(self, parent1: Agent, parent2: Agent) -> [Agent, Agent]:
        """
        :param parent1: an Agent to generate a couple of children with another Agent
        :param parent2: an Agent to generate a couple of children with another Agent
        :return: a listf of 2 children (new Agents) with the new combination of the parents (Agents) genes (params)
        """
        random.seed()

        parent1_params = parent1.percent_first_move, \
                         parent1.percent_second_move, \
                         parent1.percent_third_move, \
                         parent1.percent_fourth_move \

        parent2_params = parent2.percent_first_move, \
                         parent2.percent_second_move, \
                         parent2.percent_third_move, \
                         parent2.percent_fourth_move

        index = random.randint(1, 3)
        child1_params = parent1_params[:index] + parent2_params[index:]
        child2_params = parent2_params[:index] + parent1_params[index:]
        child1 = Agent("1", get_random_name(), 0, 0, 0, 0)
        child2 = Agent("2", get_random_name(), 0, 0, 0, 0)
        child1.percent_first_move, \
        child1.percent_second_move, \
        child1.percent_third_move, \
        child1.percent_fourth_move = child1_params

        child2.percent_first_move, \
        child2.percent_second_move, \
        child2.percent_third_move, \
        child2.percent_fourth_move = child2_params
        return [child1, child2]
コード例 #12
0
def test_mutate():
    trainer = GeneticTrainer(10, 100)
    individual = Agent("1", get_random_name(), 0.14, 0.33, 0.67, 0.96)
    original_params = [
        individual.percent_first_move, individual.percent_second_move,
        individual.percent_third_move, individual.percent_fourth_move
    ]
    mutated = trainer.mutate(individual, 100)
    mutated_params = [
        mutated.percent_first_move, mutated.percent_second_move,
        mutated.percent_third_move, mutated.percent_fourth_move
    ]

    assert (original_params != mutated_params)
コード例 #13
0
 def generate_population(self, population_size) -> list:
     """
     :param population_size: amount of individuals to be created for the new population
     :return: return the randomly generated population in order to use it if required, if not, it's still kept in the
     local attribute self.population
     """
     for i in range(0, population_size):
         random.seed()
         first_percent = round(random.uniform(0, 0.25), 2)
         second_percent = round(random.uniform(first_percent, 0.5), 2)
         third_percent = round(random.uniform(second_percent, 0.75), 2)
         fourth_percent = round(random.uniform(third_percent, 1), 2)
         self.population.append(
             Agent("1", get_random_name(), first_percent, second_percent,
                   third_percent, fourth_percent))
     return self.population
コード例 #14
0
    def train(self, individual: Agent, opponents: list):
        '''
        :param individual: an Agent that will play against a list of Agents (opponents) in order to check the amount of
        wins it gets
        :param opponents: a list of Agents that will play 1 by 1 against the individual to be trained
        :return: the same individual (Agent) from the input, but 'trained'
        '''

        individual.character = "1"
        for opponent in opponents:
            opponent.character = "2"
            players = [individual, opponent]
            game = Connect4('cls')
            game.default()
            game.game_mode = game.game_modes[0]
            [individual, _] = game.logic_play(players=players)
        return individual
コード例 #15
0
def test_make_pairs():
    agent1 = Agent("1", get_random_name(), 0, 0, 0, 0)
    agent2 = Agent("2", get_random_name(), 0, 0, 0, 0)
    agent3 = Agent("1", get_random_name(), 0, 0, 0, 0)
    agent4 = Agent("2", get_random_name(), 0, 0, 0, 0)
    agent5 = Agent("1", get_random_name(), 0, 0, 0, 0)
    agent6 = Agent("2", get_random_name(), 0, 0, 0, 0)
    agents = [agent1, agent2, agent3, agent4, agent5, agent6]
    couples_in_a_row = []
    for agent01, agent02 in make_pairs(agents):
        couples_in_a_row.append(agent01)
        couples_in_a_row.append(agent02)
    assert (agents == couples_in_a_row)
コード例 #16
0
ファイル: Evaluater.py プロジェクト: nikollson/AIAnimation
    def CalcScores(self, best, next, filePath):

        bestModel = MujocoModelHumanoid()
        bestTask = MujocoTask(bestModel, filePath)
        bestEnv = MujocoEnv(bestModel)


        nextModel = MujocoModelHumanoid()
        nextTask = MujocoTask(nextModel, filePath)
        nextEnv = MujocoEnv(nextModel)

        bestAgent = Agent(self.Config.EvaluateAgent, best, bestModel, bestTask)
        nextAgent = Agent(self.Config.EvaluateAgent, next, nextModel, nextTask)

        bestAction = bestAgent.SearchBestAction()
        nextAction = nextAgent.SearchBestAction()

        bestScore = self.GetScore(bestEnv, bestTask, bestAction)
        nextScore = self.GetScore(nextEnv, nextTask, nextAction)

        #nextAgent.SaveTrainData(self.Config.GetTrainPath("next"))

        return bestScore, nextScore
コード例 #17
0
 def test_throw_die_26_57(self):
      a = Agent("1", "",0.98, 0.7, 0.5, 0.1)
      die = a.throw_die(0.26,0.57)
      assert( 0.26 <= die <= 0.57 ) 
コード例 #18
0
 def test_is_in_range_for_first_percent_bwtween_range(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     
     assert( a.is_in_range(0, 0.63)  == True)   
コード例 #19
0
 def test_is_in_range_for_first_percent_out_range(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     assert( a.is_in_range(0, 0.75)  == False) 
コード例 #20
0
 def test_is_in_range_for_second_percent_limit2(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     
     assert( a.is_in_range(1, 0.5) == True) 
コード例 #21
0
 def test_is_in_range_for_second_percent_bwtween_range(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     assert( a.is_in_range(1, 0.30)  ==True ) 
コード例 #22
0
 def test_is_in_range_for_third_percent_limit2(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     
     assert( a.is_in_range(2, 0.9) == True) 
コード例 #23
0
 def test_create_ranges_disordered(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     dic = a.create_ranges(0.7, 0.5, 0.9, 0.2)
     assert ({3: [0, 0.2], 1: [0.21, 0.5], 0: [0.51, 0.7], 2: [0.71, 0.9]} == dic)
コード例 #24
0
 def test_is_in_range_for_third_percent_bwtween_range(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     assert( a.is_in_range(2 , 0.85)  ==True ) 
コード例 #25
0
from Agent.Agent import Agent
from Network.Exceptions import *
import os.path

pathList = os.path.abspath("").split("\\")
pathList = pathList[:-2]
path = ""
for pathL in pathList:
    path += pathL + "\\"
path = path[:-1]

agent = Agent(path + "/gameData/agentConfig.json")
agent.createNewAgent()
agent.export(path + "/playData/agents/a0B")
コード例 #26
0
 def test_is_in_range_for_third_percent_out_range(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     assert( a.is_in_range(2, 0.5)  == False) 
コード例 #27
0
 def test_create_ranges_set_biggest_percent(self):
     a = Agent("1", "",0.98, 0.7, 0.5, 0.1)
     dic = a.create_ranges(0.88, 0.7, 0.5, 0.1)
     assert (a.biggest_percent == 0.88)
コード例 #28
0
    def compute_path(self):
        count = 0
        from Agent.Agent import Agent
        self.__agents = []
        for drone in program.get_drones():  # Create Reinforcement Learning Agents
            self.__agents.append(
                Agent(drone.get_name(), count, drone.get_battery_time(),
                      drone.get_speed(),
                      program.compute_minimum_area(self.__drones), (0, 0), self.__original_environment))

            count += 1

        # Get number of observation episodes
        number_episodes = Config.SIMULATIONS

        import time
        global_execution_start_time = time.time()
        start_number = 0
        done_count = 0  # Number of times problem has been solved

        # Get epsilon
        epsilon = Config.EPSILON

        # Save epsilon for plotting
        epsilons = [epsilon]

        # Total repetitions in all episodes
        total_unchanged_environment_episodes_count = 0

        # Maximum coverage overall
        max_coverage = 0.0

        # Max coverage lists for plotting for the whole experiment
        max_coverages = []

        # Simulations' times
        episodes_time = []

        # Simulations' total rewards
        rewards_episodes = []

        # Store total actions taken per observation
        episode_total_actions = []
        episode_total_valid_actions = []

        valid_actions_taken_agent = []

        # Compute episodes
        for episode_number in range(start_number, number_episodes):

            # Reset agents and environment
            program.reset()

            # Update heatmap
            heatmap = self.get_environment() * 0.0
            for element in self.__agents:
                (x, y) = element.get_position()
                heatmap[x][y] += 1.0

            # Add minimum max coverage
            max_coverages.append(0.0)

            # Add max coverage observation
            coverages_episode = [0.0]

            # Reset unchanged environments count
            unchanged_environment_episodes_count = 0

            # Create ANN if necessary
            if (Config.GLOBAL_MODEL):
                from numpy import dstack
                input_matrix = dstack((self.get_environment(), self.get_environment(), self.get_environment()))
                from Model.Model import create_model
                model = create_model(input_matrix.shape)

            # Get initial environment for starting observation
            actual_environment = program.get_environment()

            # Get visited positions map and agent position map
            import numpy as np
            actual_visited_map = np.array(actual_environment * 0.0, dtype=bool)  # Changed to bool for first experiments
            drone_map = np.array(actual_environment * 0.0, dtype=bool)  # Changed to bool for first experiments

            # Rewards and for plotting
            rewards_episodes.append(0.0)
            rewards = []
            action_rewards = []
            for _ in self.__agents:
                rewards.append([0])
                action_rewards.append([0])

            # Mark agents positions as true
            for agent in self.__agents:
                (i, j) = agent.get_position()
                drone_map[i, j] = True
                actual_visited_map[i, j] = True

            # Print trace every 100 episodes
            if episode_number % Config.SIMULATIONS_CHECKPOINT == 0 and Config.PRINT_SIMULATIONS:
                print("Episode {} of {}".format(episode_number + 1, number_episodes))

            # Compute paths
            done = False
            episode_counter = 0
            visited_list = []  # store each agent's visited squares
            visited_list.append(actual_visited_map)  # store each agent's visited squares

            # Add new values to actions lists
            episode_total_actions.append(0.0)
            episode_total_valid_actions.append(0.0)

            if len(valid_actions_taken_agent):
                for element in self.get_agents():
                    valid_actions_taken_agent[element.get_number()].append(0.0)
            else:
                for _ in self.get_agents():
                    valid_actions_taken_agent.append([0.0])

            # Store trendline_slope
            trendline_slope = -1.0

            import time
            start_time = time.time()
            while not done:

                # Get previous environment (this way all agents would act at the same time)
                prev_visited_map = np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool).copy()
                prev_drone_map = drone_map.copy()
                drone_position_list = []  # store each agent's position

                # For each agent compute 1 action
                for agent in program.get_agents():

                    # Make decision
                    import numpy as np
                    rand_number = np.random.random()

                    if rand_number < epsilon:
                        random_action = True
                        # Get random action
                        chosen_action = np.random.randint(0, len(Config.ACTIONS_DICT.keys()))
                    else:
                        random_action = False
                        # Decide one action
                        if not Config.GLOBAL_MODEL:
                            chosen_action = np.argmax(agent.predict(np.array(prev_visited_map, dtype=int),
                                                                    np.array(prev_drone_map, dtype=int),
                                                                    self.get_environment(), ))
                        else:
                            chosen_action = np.argmax(agent.predict_global_model(np.array(prev_visited_map, dtype=int),
                                                                                 np.array(prev_drone_map, dtype=int),
                                                                                 self.get_environment(),
                                                                                 model))

                    episode_total_actions[episode_number] += 1.0

                    # Get agent's position before doing action for printing it in a file
                    prev_position = agent.get_position()

                    # Update environment according to action
                    actual_visited_map, actual_drone_map, reward = agent.do_action(chosen_action,
                                                                                   self.__original_environment,
                                                                                   prev_visited_map, prev_drone_map)

                    (r, c) = agent.get_position()
                    heatmap[r][c] += 1.0

                    # Plot heatmap
                    import matplotlib
                    matplotlib.use('Agg')  # For running in SO without graphical environment
                    import matplotlib.pyplot as plt
                    plt.plot(rewards[agent.get_number()])
                    fig, ax = plt.subplots()
                    im = ax.imshow(heatmap)
                    for r in range(Config.ENVIRONMENT_ROWS):
                        for c in range(Config.ENVIRONMENT_COLUMNS):
                            text = ax.text(c, r, heatmap[r, c], ha="center", va="center", color="w")
                    fig.tight_layout()
                    plt.savefig('heatmap_episode_' + str(episode_number) + '.png')
                    plt.clf()

                    # Plot agent's reward graph
                    from numpy import sum
                    rewards[agent.get_number()].append(sum(rewards[agent.get_number()]) + agent.get_reward())
                    action_rewards[agent.get_number()].append(agent.get_reward())
                    rewards_episodes[episode_number] += agent.get_reward()
                    import matplotlib
                    matplotlib.use('Agg')  # For running in SO without graphical environment
                    import matplotlib.pyplot as plt
                    plt.plot(rewards[agent.get_number()])
                    plt.savefig('total_reward_evolution_drone_' + str(agent.get_number()) + '.png')
                    plt.clf()
                    plt.plot(action_rewards[agent.get_number()])
                    plt.savefig('action_reward_evolution_drone_' + str(agent.get_number()) + '.png')
                    plt.clf()

                    if (prev_visited_map != actual_visited_map).any():
                        agent.increase_valid_taken_actions()
                        episode_total_valid_actions[episode_number] += 1.0

                    # Store the number of times in a row that the environment does not change
                    if (prev_visited_map == actual_visited_map).all():
                        unchanged_environment_episodes_count += 1
                    else:
                        unchanged_environment_episodes_count = 0

                    # Save taken action in a file
                    with open(
                            Config.BASE_ROUTE + 'actions_' + str(agent.get_number()) + '_' + agent.get_name() + '.csv',
                            'a+') as f:
                        if not episode_counter:
                            agent.set_status('flying')
                            f.write(
                                'action_code, action_name, prev_position, actual_position, valid, visited, random_action, environment_shape, actions_taken, valid_taken_actions, unchanged_episodes\n')
                        f.write(str(chosen_action) + ', ' + Config.ACTIONS_DICT[chosen_action] + ', ' + str(
                            prev_position) + ', ' + str(agent.get_position()) + ', ' + str(
                            prev_position != agent.get_position())
                                + ', ' + str((prev_position != agent.get_position()) and
                                             (prev_visited_map[agent.get_position()[0], agent.get_position()[1]]))
                                + ', ' + str(random_action)
                                + ', ' + str(self.__original_environment.shape) + ', ' + str(agent.get_actions_taken())
                                + ', ' + str(agent.get_valid_taken_actions()) + ', ' + str(
                            unchanged_environment_episodes_count) + '\n')

                    # Memorize new memory observation
                    observation = (
                    prev_visited_map, actual_visited_map, prev_drone_map, actual_drone_map, chosen_action,
                    reward, agent.get_status())
                    agent.memorize(observation)

                    # Save agent results for merging with the remaining agents
                    visited_list.append(actual_visited_map + (1.0 - self.get_environment()))
                    import matplotlib
                    matplotlib.use('Agg')  # For running in SO without graphical environment
                    import matplotlib.pyplot as plt
                    plt.imshow(np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool), cmap='Greys',
                               interpolation='nearest')
                    plt.savefig(Config.BASE_ROUTE + 'combined_visited_list.png')
                    plt.clf()

                    drone_position_list.append(actual_drone_map)

                    # Train
                    if not Config.GLOBAL_MODEL:
                        agent_history = agent.learn(self.get_environment())
                        agent.get_model().save(str(agent.get_number()) + '_local_model.h5')
                    else:
                        agent_history = agent.learn_global_model(self.get_environment(), model)
                        model.save('global_model.h5')

                    # Check experiment stopping
                    waiting_hours = float(time.time() - start_time) / 60.0 / 60.0  # Convert seconds to hours

                    import numpy as np
                    borders_matrix = 1.0 - np.ceil(self.get_environment())
                    visited_matrix = np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=float)
                    visited_matrix = np.where(visited_matrix >= 1.0, 1.0, visited_matrix)
                    only_visited_cells_matrix = visited_matrix - borders_matrix

                    visited_cells_count = float(np.count_nonzero(only_visited_cells_matrix == 1.0))
                    visitable_cells_count = float(np.count_nonzero(self.get_environment() == 1.0))
                    coverage = visited_cells_count / visitable_cells_count

                    max_coverage = max(coverage, max_coverage)
                    max_coverages[episode_number] = max(coverage, max_coverages[episode_number])
                    coverages_episode.append(coverage)

                    valid_actions_taken_agent[agent.get_number()][episode_number] = agent.get_valid_taken_actions()

                    if unchanged_environment_episodes_count >= Config.MAXIMUM_UNCHANGED_ENVIRONMENT_EPISODES:
                        total_unchanged_environment_episodes_count += unchanged_environment_episodes_count
                        done = True
                        break
                    elif waiting_hours >= Config.MAXIMUM_WAIT_HOURS and coverage < Config.COMPLETENESS_COVERAGE:
                        total_unchanged_environment_episodes_count += unchanged_environment_episodes_count
                        done = True
                        break

                    # Check if agent had finished
                    if False not in np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool):
                        with open(Config.BASE_ROUTE + 'solution_times.txt', 'a+') as f:
                            f.write('solution time ' + str(done_count) + ': '
                                    + time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))
                                    + ' epsilon: ' + str(epsilon)
                                    + '\n')
                        done_count += 1
                        done = True
                        break

                episode_counter += 1

                # Combine agents results
                drone_map = np.array(np.sum(drone_position_list, axis=0), dtype=bool)

            # Plot coverages for each observation graph
            if len(coverages_episode) > 1:
                import matplotlib
                matplotlib.use('Agg')  # For running in SO without graphical environment
                import matplotlib.pyplot as plt
                ax = plt.figure().gca()
                ax.set_ylim([0.0, 1.0])
                x = list(range(len(coverages_episode)))
                y = coverages_episode
                from numpy import polyfit
                fit = polyfit(x, y, 1)
                yfit = [n * fit[0] for n in x] + fit[1]
                ax.plot(x, y)
                ax.plot(yfit, 'r--')
                plt.savefig('coverages_episode_' + str(episode_number) + '.png')
                plt.clf()

            # Store and plot observation's time
            episodes_time.append((time.time() - start_time) / 3600.0)
            import numpy as np
            average_episode_time = np.average(episodes_time)
            import matplotlib
            matplotlib.use('Agg')  # For running in SO without graphical environment
            import matplotlib.pyplot as plt
            ax = plt.figure().gca()
            ax.plot(episodes_time)
            from matplotlib.ticker import MaxNLocator
            ax.xaxis.set_major_locator(MaxNLocator(integer=True))
            plt.savefig('episode_time_hours.png')
            plt.clf()


            # Plot valid action percentage per observation graph
            if len(episode_total_valid_actions) > 1:
                import matplotlib
                matplotlib.use('Agg')  # For running in SO without graphical environment
                import matplotlib.pyplot as plt
                import numpy as np
                ax = plt.figure().gca()
                division = np.divide(episode_total_valid_actions, episode_total_actions)
                ax.set_ylim([0.0, 1.0])
                x = list(range(len(division)))
                y = division
                from numpy import polyfit
                fit = polyfit(x, y, 1)
                yfit = [n * fit[0] for n in x] + fit[1]
                ax.plot(x, y)
                ax.plot(yfit, 'r--')
                plt.savefig('actions_percentages_episodes.png')
                plt.clf()

                import matplotlib
                matplotlib.use('Agg')  # For running in SO without graphical environment
                import matplotlib.pyplot as plt
                import numpy as np
                ax = plt.figure().gca()
                ax.set_ylim([0.0, 1.0])
                for element in self.get_agents():
                    division = np.divide(valid_actions_taken_agent[element.get_number()], episode_total_actions)
                    x = list(range(len(division)))
                    y = division
                    ax.plot(x, y)
                plt.savefig('percentage_work_per_agent.png')
                plt.clf()

            # Plot coverages graph
            if len(max_coverages) > 1:
                import matplotlib
                matplotlib.use('Agg')  # For running in SO without graphical environment
                import matplotlib.pyplot as plt
                ax = plt.figure().gca()
                ax.set_ylim(bottom=0.0)
                x = list(range(len(max_coverages)))
                y = max_coverages
                from scipy.stats import linregress
                trend = linregress(x, y)
                trendline_slope = trend.slope  # or fit[0]
                from numpy import polyfit
                fit = polyfit(x, y, 1)
                yfit = [n * fit[0] for n in x] + fit[1]
                ax.plot(x, y)
                ax.plot(yfit, 'r--')
                plt.savefig('coverages.png')
                plt.clf()

            # Plot epsilon graph
            import matplotlib
            matplotlib.use('Agg')  # For running in SO without graphical environment
            import matplotlib.pyplot as plt
            ax = plt.figure().gca()
            ax.plot(epsilons)
            from matplotlib.ticker import MaxNLocator
            ax.xaxis.set_major_locator(MaxNLocator(integer=True))
            plt.savefig('epsilons.png')
            plt.clf()

            # Update epsilon
            # The lower the epsilon, less random actions are taken
            epsilon = max(Config.MIN_EPSILON, epsilon * Config.EPSILON_DECAY)
            epsilons.append(epsilon)
コード例 #29
0
 def test_is_in_range_for_first_percent_limit2(self):
     a = Agent("1", "",0.7, 0.5, 0.9, 0.2)
     
     assert( a.is_in_range(0, 0.70) == True) 
コード例 #30
0
 def test_throw_die_0_100(self):
      a = Agent("1", "",0.98, 0.7, 0.5, 0.1)
      die = a.throw_die(0,1)
      
      assert( 0 <= die <= 1 )