def demo(self):
        """
		Function to make a demo of our agents 
		"""

        self.env = None
        self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, self.actions_set, \
         Random_Seed = self.Random_Seed, timesteps_per_second = self.timesteps_per_second, mode = 'demo', delete_results = True, verbose = True)

        for idx, agent in self.Agents.items():
            agent.reset()
            agent.epsilon = 0  #Set the exploration rate to 0

        start_state = self.env.get_state()

        actions = {}

        # Initialisation
        for idx, s in start_state.items():
            actions[idx] = self.Agents[idx].choose_action(s)

        while not self.env.done:

            SARSDs = self.env.step(actions)

            if self.env.action_required:

                actions = dict()
                for idx, sarsd in SARSDs.items():
                    s, a, r, ns, d = sarsd
                    # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                    actions[idx] = int(self.Agents[idx].choose_action(ns))

        self.env.Stop_Simulation(delete_results=True)
        self.env = None
Exemple #2
0
    def demo(self):

        self.env = None
        self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary,
                               self.actions_set, \
                               Random_Seed=self.Random_Seed, timesteps_per_second=self.timesteps_per_second,
                               mode='demo', delete_results=True, verbose=True)

        start_state = self.env.get_state()

        actions = {}
        for idx, s in start_state.items():
            actions[idx] = self.Agents[idx].choose_action(s)
        # Simulation Loop, Run until end of simulation

        t = 0
        while (self.sim_length - 3) > self.env.global_counter:
            # self.Agents[idx].delay.append(self.env.SCUs[0].calculate_delay())
            SARSDs = self.env.step_to_next_action(actions)
            actions = dict()
            for idx, sarsd in SARSDs.items():
                s, a, r, ns, d = sarsd
                # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                actions[idx] = int(self.Agents[idx].choose_action(ns))
            t += 1
        self.env = None
Exemple #3
0
    def get_data(self):
        """
        Function to train the agents
        input the number of episode of training

        """
        self.env = None
        self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary,
                               self.actions_set, \
                               self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='training',
                               delete_results=True, verbose=True)

        # Get initial State
        start_state = self.env.get_state()
        print("start")
        # Episodic training loop

        # Create dictionary for chosen actions for each agent and fill it
        actions = {}
        for idx, s in start_state.items():
            actions[idx] = self.Agents[idx].choose_action(s)
        # Simulation Loop, Run until end of simulation

        t = 0
        while (self.sim_length - 3) > self.env.global_counter:
            #self.Agents[idx].delay.append(self.env.SCUs[0].calculate_delay())
            SARSDs = self.env.step_to_next_action(actions)
            actions = dict()
            for idx, sarsd in SARSDs.items():
                s, a, r, ns, d = sarsd
                # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                actions[idx] = int(self.Agents[idx].choose_action(ns))
            t += 1
        self.env = None
Exemple #4
0
    def train(self, number_of_episode, vissim=False):
        if vissim is not False:
            vissim.Simulation.Stop()

        self.env = None
        self.env = environment(self.model_name,
                               self.vissim_working_directory,
                               self.sim_length,
                               self.Model_dictionnary,
                               actions_set='default_actions',
                               Random_Seed=self.Random_Seed,
                               timesteps_per_second=self.timesteps_per_second,
                               mode='training',
                               delete_results=True,
                               verbose=True,
                               vissim=vissim)

        for idx, agent in self.Agents.items():
            agent.reset()

        start_state = self.env.get_state()

        while self.number_of_episode < number_of_episode:
            actions = {}
            for idx, s in start_state.items():
                actions[idx] = self.Agents[idx].choose_action(s)

            while True:
                SARSDs = self.env.step_to_next_action(actions)

                actions = dict()
                for idx, sarsd in SARSDs.items():
                    s, a, r, ns, d = sarsd

                    #print(sarsd)
                    self.Agents[idx].remember(s, a, r, ns, d)
                    if len(self.Agents[idx].memory
                           ) >= self.Agents[idx].n_step_size:
                        self.Agents[idx].learn()

                    # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                    actions[idx] = int(self.Agents[idx].choose_action(ns))

                # For the saving , monitoring of the agent
                if self.env.done:
                    self.env.reset()
                    self.Random_Seed += 1
                    self.number_of_episode += 1
                    print('Episode {} is finished'.format(
                        self.number_of_episode))

                    # if (i+1)%reduce_entropy_every == 0:
                    #        if Agents[idx].params['entropy'] >= entropy_threshold :
                    #            Agents[idx].reduce_entropy()
                    #            print ("Agent {} : Entropy reduced to {} " .format(idx, Agents[idx].params['entropy']))

                    # Only for AC
                    for idx, agent in self.Agents.items():
                        predicted_values, true_values, proba0, probas = agent.value_check(
                            self.horizon, self.n_sample)
                        print(
                            "Agent {} : Predicted Values and True Return : \n {} \n {}"
                            .format(idx, predicted_values, true_values))
                        print(
                            "Agent {} : Proba distribution on those states : \n {}"
                            .format(idx, probas))
                        print(
                            "Agent {} : Proba distribution on the 0 state : \n {}"
                            .format(idx, proba0))
                        agent.average_reward = np.mean(agent.episode_reward)
                        agent.reward_storage.append(agent.average_reward)
                        print("Average Reward for Agent {} this episode : {}".
                              format(idx, round(agent.average_reward, 2)))
                        agent.loss.append(agent.losses[2])
                        agent.best_agent(self.vissim_working_directory,
                                         self.model_name, self.Session_ID,
                                         self.Session_ID)
                        agent.reset()

                    if self.number_of_episode % self.save_every == 0:
                        self.save(self.number_of_episode, self.save_location)

                    break
Exemple #5
0
    def test(self, vissim=False):
        """
                Function to test our agents on one episode with all the metrics : queues over time, delay
                Average reward of the agents.
                """
        if vissim is not False:
            vissim.Simulation.Stop()

        self.env = None
        self.env = environment(self.model_name,
                               self.vissim_working_directory,
                               self.sim_length,
                               self.Model_dictionnary,
                               Random_Seed=self.Random_Seed,
                               timesteps_per_second=self.timesteps_per_second,
                               mode='test',
                               actions_set='default_actions',
                               delete_results=True,
                               verbose=True,
                               vissim=vissim)

        # Counter to change the demande during test
        demand_counter = 0
        self.env.change_demand(self.env.vehicle_demand[demand_counter])

        #Initialisation of the metrics
        Episode_Queues = {}  #
        Cumulative_Episode_Delays = {}  # Delay at each junction
        Cumulative_Episode_stop_Delays = {}  # Delay at each junction

        Cumulative_Totale_network_delay = [0]
        Cumulative_Totale_network_stop_delay = [0]

        queues = self.env.get_queues()
        for idx, junction_queues in queues.items():
            Episode_Queues[idx] = [junction_queues]

        delays = self.env.get_delays()
        for idx, junction_delay in delays.items():
            Cumulative_Episode_Delays[idx] = [junction_delay]

        stop_delays = self.env.get_stop_delays()
        for idx, junction_stop_delay in stop_delays.items():
            Cumulative_Episode_stop_Delays[idx] = [junction_stop_delay]

        for idx, agent in self.Agents.items():
            agent.reset()

        start_state = self.env.get_state()

        actions = {}

        # Initialisation
        for idx, s in start_state.items():
            actions[idx] = self.Agents[idx].choose_action(s)

        while not self.env.done:

            SARSDs = self.env.step(actions)

            queues = self.env.get_queues()
            for idx, junction_queues in queues.items():
                Episode_Queues[idx].append(junction_queues)

            delays = self.env.get_delays()
            for idx, junction_delay in delays.items():
                Cumulative_Episode_Delays[idx].append(
                    Cumulative_Episode_Delays[idx][-1] + junction_delay)

            stop_delays = self.env.get_stop_delays()
            for idx, junction_stop_delay in stop_delays.items():
                Cumulative_Episode_stop_Delays[idx].append(
                    Cumulative_Episode_stop_Delays[idx][-1] +
                    junction_stop_delay)

            Cumulative_Totale_network_delay.append(
                Cumulative_Totale_network_delay[-1] +
                self.env.get_delay_timestep())
            Cumulative_Totale_network_stop_delay.append(
                Cumulative_Totale_network_stop_delay[-1] +
                self.env.get_stop_delay_timestep())

            if self.env.action_required:

                actions = dict()
                for idx, sarsd in SARSDs.items():
                    s, a, r, ns, d = sarsd

                    self.Agents[idx].remember(s, a, r, ns, d)
                    # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                    actions[idx] = int(self.Agents[idx].choose_action(ns))

            if self.env.global_counter % 360 == 0:
                demand_counter += 1
                self.env.change_demand(self.env.vehicle_demand[demand_counter])

        # Stop the simulation without erasing the database
        self.env.Stop_Simulation(delete_results=False)
        self.env = None
        return (Episode_Queues, Cumulative_Episode_Delays,
                Cumulative_Episode_stop_Delays,
                Cumulative_Totale_network_delay,
                Cumulative_Totale_network_stop_delay)
    def train(self, number_of_episode, vissim=False):
        """
                Function to train the agents
                input the number of episode of training

                """
        if vissim is not False:
            vissim.Simulation.Stop()

        self.env = None
        self.env = environment(self.model_name,
                               self.vissim_working_directory,
                               self.sim_length,
                               self.Model_dictionnary,
                               actions_set='default_actions',
                               Random_Seed=self.Random_Seed,
                               timesteps_per_second=self.timesteps_per_second,
                               mode='train',
                               delete_results=True,
                               verbose=True,
                               vissim=vissim)

        # Reset lists for episode reward and episode memory
        for idx, agent in self.Agents.items():
            agent.reset()

        # Get initial State
        start_state = self.env.get_state()
        print("start")
        demand_counter = 0
        no_of_demands = len(self.env.vehicle_demand) - 1
        # Episodic training loop
        while self.number_of_episode < number_of_episode:
            # Episodic training loop

            # Counter to change the demande during test
            # THIS NEEDS CHANGING NEIL!!!
            demand_counter = np.random.randint(9)

            ## ATTENTION HERE. DEMAND CHANGES DEACTIVATED.
            self.env.change_demand(self.env.vehicle_demand[demand_counter])
            self.env.change_demand(self.env.vehicle_demand[demand_counter])
            # Create dictionary for chosen actions for each agent and fill it
            actions = {}
            for idx, s in start_state.items():
                actions[idx] = self.Agents[idx].choose_action(s)

            # Simulation Loop, Run until end of simulation
            while True:
                SARSDs = self.env.step_to_next_action(actions)

                actions = dict()
                for idx, sarsd in SARSDs.items():
                    s, a, r, ns, d = sarsd

                    #print(sarsd)
                    self.Agents[idx].remember(s, a, r, ns, d)

                    # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                    actions[idx] = int(self.Agents[idx].choose_action(ns))

                # For the saving , monitoring of the agent
                if self.env.done:
                    self.env.reset()
                    self.Random_Seed += 1
                    self.number_of_episode += 1
                    print('Episode {}: Finished running.'.format(
                        self.number_of_episode))

                    for idx, agent in self.Agents.items():
                        agent.average_reward = np.mean(agent.episode_reward)
                        agent.reward_storage.append(agent.average_reward)
                        print("Agent {}, Average Reward: {}".format(
                            idx, round(agent.average_reward, 2)))
                        agent.best_agent(self.vissim_working_directory,
                                         self.model_name, self.agent_type,
                                         self.Session_ID)
                        for i in range(self.learning_iterations):
                            agent.learn_batch(self.batch_size, 1)

                        if self.number_of_episode % self.copy_weights_frequency == 0:
                            agent.copy_weights()

                        agent.reset()

                    if self.number_of_episode % self.save_every == 0:
                        self.save(self.number_of_episode)

                    # Decrease the exploration rate
                    self.advance_schedule()

                    if self.number_of_episode != number_of_episode + 1:
                        print('Episode {}: Starting computation.'.format(
                            self.number_of_episode + 1))

                    break

        self.env = None
    def prepopulate_memory(self, vissim=False):

        # Chech if suitable folder exists
        prepopulation_directory = os.path.join(self.vissim_working_directory,
                                               self.model_name,
                                               "Agents_Results",
                                               self.agent_type,
                                               self.Session_ID)
        if not os.path.exists(prepopulation_directory):
            os.makedirs(prepopulation_directory)
        # Chech if suitable file exists
        if self.PER_activated:
            PER_prepopulation_filename = os.path.join(
                prepopulation_directory,
                'Agent' + str(0) + '_PERPre_' + str(self.memory_size) + '.p')
        else:
            PER_prepopulation_filename = os.path.join(
                prepopulation_directory,
                'Agent' + str(0) + '_Pre_' + str(self.memory_size) + '.p')

        prepopulation_exists = os.path.isfile(PER_prepopulation_filename)
        # If it does, process it into the memory
        if prepopulation_exists:
            if self.PER_activated:
                print("Previous Experience: Found. Loading into agents")
                for idx, agent in self.Agents.items():
                    PER_prepopulation_filename = os.path.join(
                        prepopulation_directory, 'Agent' + str(idx) +
                        '_PERPre_' + str(self.memory_size) + '.p')
                    memory = pickle.load(open(PER_prepopulation_filename,
                                              'rb'))
                    print(
                        "Previous Experience: Successfully loaded file from:")
                    print(PER_prepopulation_filename)
                    for s, a, r, s, d in memory:
                        agent.remember(s, a, r, s, d)
                    # FCalculate importance sampling weights
                    update_priority_weights(agent, self.memory_size)

            else:
                for idx, agent in self.Agents.items():
                    PER_prepopulation_filename = os.path.join(
                        prepopulation_directory, 'Agent' + str(idx) + '_Pre_' +
                        str(self.memory_size) + '.p')
                    agent.memory = pickle.load(
                        open(PER_prepopulation_filename, 'rb'))
            return

        else:
            print("Experience file not found. Generating now...")
            # keep the count of the number of transition in each agent memory
            agents_memory = {}
            for idx, agent in self.Agents.items():
                agents_memory[idx] = []

            # 10000 is a random number to have a simulation speed quick enough
            self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary,  self.actions_set, \
                    self.Random_Seed, timesteps_per_second = self.timesteps_per_second, mode = 'training', delete_results = True, verbose = True, vissim = vissim)

            memory_full = False
            # Time counter
            number_of_action_taken = 0

            start_state = self.env.get_state()
            actions = {}
            for idx, s in start_state.items():
                actions[idx] = int(self.Agents[idx].choose_action(s))

            while not memory_full:
                SARSDs = self.env.step_to_next_action(actions)

                if number_of_action_taken % 1000 == 0:
                    for idx, memory in agents_memory.items():
                        print("After {} actions taken by the Agents,  Agent {} memory is {} percent full"\
                                .format(number_of_action_taken, idx , np.round(100*len(memory)/self.memory_size,2)))

                actions = dict()

                for idx, sarsd in SARSDs.items():
                    s, a, r, ns, d = sarsd

                    #print(sarsd)
                    self.Agents[idx].remember(s, a, r, ns, d)

                    agents_memory[idx].append([s, a, r, ns, d])

                    # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                    actions[idx] = int(self.Agents[idx].choose_action(ns))

                    number_of_action_taken += 1

                # check if all the agents have their memory full
                memory_full = True
                for idx, memory in agents_memory.items():
                    if len(memory) < self.memory_size:
                        memory_full = False

                # For the saving , monitoring of the agent
                if self.env.done:
                    self.env.reset()

                    actions = {}
                    for idx, s in start_state.items():
                        actions[idx] = self.Agents[idx].choose_action(s)

            for idx, agent in self.Agents.items():
                if self.PER_activated:
                    update_priority_weights(agent, self.memory_size)
                    PER_prepopulation_filename = os.path.join(
                        prepopulation_directory, 'Agent' + str(idx) +
                        '_PERPre_' + str(self.memory_size) + '.p')

                    # Dump random transitions into pickle file for later prepopulation of PER
                    print("Memory filled. Saving as:" +
                          PER_prepopulation_filename)
                    pickle.dump(agents_memory[idx],
                                open(PER_prepopulation_filename, 'wb'))

                else:

                    PER_prepopulation_filename = os.path.join(
                        prepopulation_directory, 'Agent' + str(idx) + '_Pre_' +
                        str(self.memory_size) + '.p')
                    print("Memory filled. Saving as:" +
                          PER_prepopulation_filename)
                    pickle.dump(agents_memory[idx],
                                open(PER_prepopulation_filename, 'wb'))
    def test(self, vissim=False):
        """
                Function to test our agents on one episode with all the metrics : queues over time, delay
                Average reward of the agents.
                """

        if vissim is not False:
            vissim.Simulation.Stop()

        self.env = None
        self.env = environment(self.model_name,
                               self.vissim_working_directory,
                               self.sim_length,
                               self.Model_dictionnary,
                               actions_set='default_actions',
                               Random_Seed=self.Random_Seed,
                               timesteps_per_second=self.timesteps_per_second,
                               mode='test',
                               delete_results=True,
                               verbose=True,
                               vissim=vissim)

        # Counter to change the demande during test
        demand_counter = 0

        ## ATTENTION HERE. DEMAND CHANGES DEACTIVATED.
        #self.env.change_demand(self.env.vehicle_demand[demand_counter])
        #self.env.change_demand(self.env.vehicle_demand[demand_counter])

        #Initialisation of the metrics
        self.Episode_Queues = {}  #
        self.Cumulative_Episode_Delays = {}  # Delay at each junction
        self.Cumulative_Episode_stop_Delays = {}  # Delay at each junction

        self.Cumulative_Totale_network_delay = [0]
        self.Cumulative_Totale_network_stop_delay = [0]

        queues = self.env.get_queues()
        for idx, junction_queues in queues.items():
            self.Episode_Queues[idx] = [junction_queues]

        delays = self.env.get_delays()
        for idx, junction_delay in delays.items():
            self.Cumulative_Episode_Delays[idx] = [junction_delay]

        stop_delays = self.env.get_stop_delays()
        for idx, junction_stop_delay in stop_delays.items():
            self.Cumulative_Episode_stop_Delays[idx] = [junction_stop_delay]

        for idx, agent in self.Agents.items():
            agent.reset()
            agent.epsilon = 0  #Set the exploration rate to 0

        start_state = self.env.get_state()

        actions = {}

        # Initialisation
        for idx, s in start_state.items():
            actions[idx] = self.Agents[idx].choose_action(s)

        # Simulation
        while not self.env.done:
            # Make the enfironment take a step
            SARSDs = self.env.step(actions)
            # Read the queues and store them
            queues = self.env.get_queues()
            for idx, junction_queues in queues.items():
                self.Episode_Queues[idx].append(junction_queues)
            # Do the same with the global delays
            delays = self.env.get_delays()
            for idx, junction_delay in delays.items():
                self.Cumulative_Episode_Delays[idx].append(
                    self.Cumulative_Episode_Delays[idx][-1] + junction_delay)
            # And again with the stop delay
            stop_delays = self.env.get_stop_delays()
            for idx, junction_stop_delay in stop_delays.items():
                self.Cumulative_Episode_stop_Delays[idx].append(
                    self.Cumulative_Episode_stop_Delays[idx][-1] +
                    junction_stop_delay)

            self.Cumulative_Totale_network_delay.append(
                self.Cumulative_Totale_network_delay[-1] +
                self.env.get_delay_timestep())
            self.Cumulative_Totale_network_stop_delay.append(
                self.Cumulative_Totale_network_stop_delay[-1] +
                self.env.get_stop_delay_timestep())

            # Whenever an action is required
            if self.env.action_required:

                actions = dict()
                for idx, sarsd in SARSDs.items():
                    s, a, r, ns, d = sarsd

                    self.Agents[idx].remember(s, a, r, ns, d)
                    # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator
                    actions[idx] = int(self.Agents[idx].choose_action(ns))

            ## ATTENTION: CHANGE DEMAND DEACTIVATED
            #if self.env.global_counter% 360 == 0:
            #       demand_counter += 1
            #       self.env.change_demand(self.env.vehicle_demand[demand_counter])

        # Stop the simulation without erasing the database
        self.env.Stop_Simulation(delete_results=False)
        self.env = None