Esempio n. 1
0
 def __init__(self):
     self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
     self.agent = DDQNAgent(learning_rate=0.001,
                            epsilon=0.9,
                            epsilon_decay=0.99,
                            gamma=0.8,
                            batch_size=64,
                            buffer_size=10000,
                            min_memory_size=500,
                            tau=0.1)
     self.agent.model = self.agent.create_model()
     self.agent.target_model = self.agent.create_model()
Esempio n. 2
0
def mc(args, params: EnvironmentParams):
    if args.num_agents is not None:
        num_range = [int(i) for i in args.num_agents]
        params.grid_params.num_agents_range = num_range

    try:
        env = Environment(params)
        env.agent.load_weights(args.weights)

        env.eval(int(args.samples), show=args.show)
    except AttributeError:
        print("Not overriding log dir, eval existing:")

    eval_logs("logs/training/" + args.id + "/test")
Esempio n. 3
0
    def __init__(self):
        self.clock: pygame.time.Clock = pygame.time.Clock()                     # PyGame Clock to set frame rate
        self.screen: pygame.screen = pygame.display.set_mode((WIDTH, HEIGHT))   # Window where simulation is played
        self.environment: Environment = Environment()                           # Environment where the robot is placed
        self.robot: Robot = Robot(Const.START_POS)                              # Robot
        self.keys: List[Dict[pygame.key, Callable, bool]] = [                   # Action keys with relative callback
            dict(key_code=[pygame.K_KP7], callback=self.robot.increase_left, hold=False, pressed=False),
            dict(key_code=[pygame.K_KP4], callback=self.robot.decrease_left, hold=False, pressed=False),
            dict(key_code=[pygame.K_KP9], callback=self.robot.increase_right, hold=False, pressed=False),
            dict(key_code=[pygame.K_KP6], callback=self.robot.decrease_right, hold=False, pressed=False),
            dict(key_code=[pygame.K_KP8], callback=self.robot.increase_both, hold=False, pressed=False),
            dict(key_code=[pygame.K_KP5], callback=self.robot.decrease_both, hold=False, pressed=False),
            dict(key_code=[pygame.K_w], callback=self.robot.increase_both, hold=True, pressed=False),
            dict(key_code=[pygame.K_s], callback=self.robot.decrease_both, hold=True, pressed=False),
            dict(key_code=[pygame.K_x, pygame.K_r], callback=self.robot.stop, hold=False, pressed=False),
            dict(key_code=[pygame.K_a], callback=self.robot.rotate_left, hold=True, pressed=False),
            dict(key_code=[pygame.K_d], callback=self.robot.rotate_right, hold=True, pressed=False),
            dict(key_code=[pygame.K_KP_MULTIPLY], callback=self.robot.toggle_sensor, hold=False, pressed=False),
            dict(key_code=[pygame.K_m], callback=self.toggle_test_mode, hold=False, pressed=False),
            dict(key_code=[pygame.K_n], callback=self.do_robot_update, hold=False, pressed=False)
        ]

        self.done: bool = False                                                 # Window closed ?
        pygame.display.set_caption("ARS_Robot_Simulation")                      # Window title
        icon = pygame.image.load('images/robot.png')                            # Window icon
        pygame.display.set_icon(icon)
        self.test_mode = False
def testing_loop():
    
    agents = []
    for file in glob.glob("saved_agents\\policy_*.h5"):
        agents.append(file)
    # create envoirment
    environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
    for agent in agents:
        print("for agent: " + agent)
        #start envoirment
        state = environment.reset()
        # load load agent by file name 
        agent = loadAgent(agent)
        agent.drone = environment.drones[0]
        for step in range(MAX_STEPS):
            #make move is updating the drone location
            action = agent.make_move(environment, state)
            # rendering the envoirment in new location
            environment.render()
            state_, reward, done = environment.get_info(agent.drone)
            #self.agent.store_experience([state, action, reward, state_, done])
            state = state_
            if done:
                print("reached to goal state")
                break
Esempio n. 5
0
    def __init__(self):
        # Saves an instance of the Environment class.
        self.__environment = Environment()

        # Saves an instance of the FunctionMapper class.
        self.__function_mapper = FunctionMapper()

        # Loads the module containing the special functions available to the user.
        self.__function_mapper.load_class(SpecialFunctions)

        # Loads the module containing the functions available to the user.
        self.__function_mapper.load_class(Functions)

        # Loads the module containing the predicates available to the user.
        self.__function_mapper.load_class(Predicates)

        # Saves an instance of the Evaluator class, which requires as parameters
        # the references to the instances of the Environment containing the global variables
        # and of the FunctionMapper containing the mappings between the names of the functions
        # and the predicates available to the users and the corresponding implementations.
        self.__evaluator = Evaluator(self.__environment,
                                     self.__function_mapper)
Esempio n. 6
0
 def check_collisions(self, environment: Environment,
                      current_pos: np.ndarray, next_pos: np.ndarray,
                      prev_collision: List[Collision]) -> np.ndarray:
     collisions = environment.collides(current_pos, next_pos)
     if len(collisions) == 0 or get_x_y(next_pos) == (0, 0):
         return next_pos
     else:
         closest_line = self.closest_collision(collisions, current_pos)
         t_current_pos, t_next_pos = self.recalc_next_pos(
             current_pos, next_pos, closest_line)
         # if self.recalc_next_pos(current_pos, next_pos, closest_line)[1].shape == (2,2):
         #     t_current_pos, t_next_pos = self.recalc_next_pos(current_pos, next_pos, closest_line)
         new_collisions = environment.collides(t_current_pos, t_next_pos)
         prev_collision.append(closest_line.line)
         if len(new_collisions) == 0:
             return t_next_pos
         else:
             if new_collisions[0].line in prev_collision:
                 return current_pos
             else:
                 return self.check_collisions(environment, t_current_pos,
                                              t_next_pos, prev_collision)
Esempio n. 7
0
class Main:
    train_results = "Train results 11042020F"

    def __init__(self):
        self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
        self.agent = PolicyAgent()  # create agent with default parameters
        #self.agent.load_model("saved_agents\\policy_agent.h5")

    # generating plots
    def generate_plot(self, data: list):
        fig, ax = plt.subplots()
        ax.plot(data, 'r', label=f"{self.train_results}")
        ax.set(xlabel="Episode", ylabel="Distance", title="")
        ax.grid()
        plt.legend(loc='upper left')
        plt.draw()
        fig.savefig(f'results\\{self.train_results}.png', dpi=1200)

    # testing phase->
    def training_loop(self):
        count = 0
        # iterate on epochs
        for episode in range(TOTAL_EPOCHS):
            print("Epoch # " + str(episode + 1))
            # reinitialize the envoirment
            state = self.environment.reset()
            # set agent drone to update location
            self.agent.drone = self.environment.drones[0]
            # maximum number of steps to move
            for step in range(MAX_STEPS):
                # agent get the state and decide move, more detail in available in the function
                action = self.agent.make_move(self.environment, state)
                # make move function has updated the drone location now render the env
                self.environment.render()
                # check for the reward, get next location and if we reached to goal state
                state_, reward, done = self.environment.get_info(
                    self.agent.drone)
                self.agent.addState(state)
                self.agent.store_experience(
                    [state, action, reward, state_, done])
                state = state_
                # if took specified number of steps it will update policy states
                if len(self.agent.memory) > self.agent.min_memory_size:
                    self.agent.update(self.environment)
                if done:
                    break
            # update learning rate
            self.agent.update_epsilon()
            # after every iteration store the model
            if (episode + 1) % (NUMBER_OF_EPOCHS) == 0:
                print("saving model...")
                self.agent.save_model(
                    "saved_agents\\policy_agent_{}.h5".format(count))
                count += 1
        # turn off the env
        self.environment.close()
Esempio n. 8
0
class Main:
    train_results = "Train results 11042020F"

    def __init__(self):
        self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
        self.agent = DDQNAgent(learning_rate=0.001,
                               epsilon=0.9,
                               epsilon_decay=0.99,
                               gamma=0.8,
                               batch_size=64,
                               buffer_size=10000,
                               min_memory_size=500,
                               tau=0.1)
        self.agent.model = self.agent.create_model()
        self.agent.target_model = self.agent.create_model()

    # generating plots
    def generate_plot(self, data: list):
        fig, ax = plt.subplots()
        ax.plot(data, 'r', label=f"{self.train_results}")
        ax.set(xlabel="Episode", ylabel="Distance", title="")
        ax.grid()
        plt.legend(loc='upper left')
        plt.draw()
        fig.savefig(f'results\\{self.train_results}.png', dpi=1200)

    # testing phase->
    def training_loop(self):
        for episode in range(NUMBER_OF_EPOCHS):
            print(episode)
            state = self.environment.reset()
            self.agent.drone = self.environment.drones[0]
            for step in range(MAX_STEPS):
                action = self.agent.make_move(self.environment, state)
                self.environment.render()
                state_, reward, done = self.environment.get_info(self.agent.drone)
                self.agent.store_experience([state, action, reward, state_, done])
                state = state_

                if len(self.agent.memory) > self.agent.min_memory_size:
                    self.agent.update(self.environment)
                    self.agent.update_target_weights()

                if done:
                    break

            self.agent.update_epsilon()

        self.generate_plot(self.environment.drones[0].distance_history)
        # self.agent.generate_loss()
        self.agent.save_model()
        self.environment.close()
        # self.testing_loop()

    def testing_loop(self):
        model = keras.models.load_model("saved_agents\\agent.h5")
Esempio n. 9
0
class Checker:
    def __init__(self):
        self.env = Environment()
    
    def reminder(self, file):
        time = self.env.getTime()
        if time > 17 and self.env.wavWasPlayed(file) is False:
            self.env.playWavFile(file)
        else:
            self.env.resetWav(file)
Esempio n. 10
0
def build_network(text, is_with_file):
    parser = Parser()
    builder = Builder()
    function_mapper = FunctionMapper()
    environment = Environment()
    strategy = BreadthStrategy()

    if is_with_file:
        parsed_file = parser.parse_file(text)

        (facts, rules) = builder.build(parsed_file)
        evaluator = builder.evaluator
        network = Network(evaluator, strategy)

        print("Building network from file")
        result = network.build_network(facts, rules)

        print("Network built - working on transforming it!")
        transformed = transform_states(result)

        print("Transformed - > returning result")
        return FileWithStates(get_text(text), transformed)
    else:
        parsed_text = parser.parseProgram(text)
        (facts, rules) = builder.build(parsed_text)
        evaluator = builder.evaluator
        network = Network(evaluator, strategy)

        print("Building network from text")

        result = network.build_network(facts, rules)

        print("Network built - working on transforming it!")
        transformed = transform_states(result)

        print("Transformed -> returning result")

        return transformed
Esempio n. 11
0
 def __init__(self):
     self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
     self.agent = PolicyAgent()  # create agent with default parameters
Esempio n. 12
0
    parser.add_argument('--generate_config',
                        action='store_true',
                        help='Enable to write default config only')
    parser.add_argument('--config', default=None, help='Path to config file')
    parser.add_argument('--id',
                        default=None,
                        help='Overrides the logfile name and the save name')

    args = parser.parse_args()

    if args.generate_config:
        generate_config(EnvironmentParams(), "config/default.json")
        exit(0)

    if args.config is None:
        print("Config file needed!")
        exit(1)

    if not args.gpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

    params = read_config(args.config)

    if args.id is not None:
        params.model_stats_params.save_model = "models/" + args.id
        params.model_stats_params.log_file_name = args.id

    env = Environment(params)

    env.run()
Esempio n. 13
0
        from src.GPSHandler import GPSHandler
        gpsHandler = GPSHandler(environment)
        gpsHandler.start()

        # Initiate LiveData
        from src.LiveData import LiveData
        liveData = LiveData(environment)
        liveData.start()

    # ECUHandler
    from src.ECUHandler import ECUHandler
    ecuHandler = ECUHandler(environment)
    ecuHandler.start()


try:
    # Environment
    from src.Environment import Environment
    environment = Environment()
    environment.start()
    gui = GUI(environment)

    thread.start_new_thread(initClasses, ())

    #Start gui and enter its mainloop
    gui.start()

except (KeyboardInterrupt, SystemExit):
    print("Exiting...")
    sys.exit()
Esempio n. 14
0
 def __init__(self):
     self.env = Environment()
Esempio n. 15
0
import tensorflow as tf
from src.Environment import Environment
from src.Agents import DQNAgent
from src.utils import mkdirs, save_plot, save_animation
tf.random.set_seed(1)
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)
random.seed(a=0)
np.random.seed(0)
output_dir = mkdirs(os.path.join(os.path.dirname(__file__), "output"))
models_dir = mkdirs(os.path.join(output_dir, "models"))
animations_dir = mkdirs(os.path.join(output_dir, "animations"))
log_file = os.path.join(output_dir, "log.csv")

env = Environment(n_good=0, n_bad=110)
agent = DQNAgent(env, n_sectors=4, sector_radius=1.0)
save_models = False
save_animations = True
n_episodes = 1000
iter_max = 2000
n_reward_max = 0
loss = -1  # track loss
for episode in range(n_episodes):
    iter = 0
    env.reset()  # reset environment
    ob = agent.observe(env)  # observe
    while iter < iter_max:
        action = agent.get_action(ob)  # follow epsilon-greedy policy
        state_next, reward, done = env.step(action)  # evolve
        ob_next = agent.observe(env)  # observe
Esempio n. 16
0
		# Initiate LiveData
		from src.LiveData import LiveData
		liveData = LiveData(environment)
		liveData.start()

	# ECUHandler
	from src.ECUHandler import ECUHandler
	ecuHandler = ECUHandler(environment)
	ecuHandler.start()


try:
	# Environment
	from src.Environment import Environment
	environment = Environment()
	environment.start()
	gui = GUI(environment)

	thread.start_new_thread(initClasses, ())

	#Start gui and enter its mainloop
	gui.start()
	
except (KeyboardInterrupt, SystemExit):
	print("Exiting...")
	sys.exit()



Esempio n. 17
0
def main():
    # ====================
    # Run
    # ====================
    outDir = make_dir(os.path.join(os.path.dirname(__file__), "output"))
    modelDir = make_dir(os.path.join(outDir, "models"))
    aniDir = make_dir(os.path.join(outDir, "animations"))
    logFile = os.path.join(outDir, "log.csv")

    env = Environment(n_good=0, n_bad=110)
    agent = DQNAgent(env=env, sensors=Sensors(n_sectors=4, sector_radius=1.0))

    save_models = True
    save_animations = True
    n_episodes = 1000
    n_iter_max = 2000
    n_reward_max = 0
    loss = -1  # track training loss
    for episode in range(n_episodes):

        # Generate episode
        state = env.reset()  # initialize environment
        obs = agent.observe(state, env)  # observe
        states, rewards, i = [], [], 0
        while i < n_iter_max:
            action = agent.get_action(obs)  # follow epsilon-greedy policy
            state_next, reward, done = env.step(state, action)  # evolve
            obs_next = agent.observe(state, env)  # observe
            agent.memorize((obs, action, reward, obs_next, done))  # memorize
            rewards.append(reward)
            states.append(state_next)
            state, obs = state_next, obs_next  # transition
            i += 1
            if done: break  # terminate

        # Track highly successful episodes
        n_reward_max += (sum(rewards) >= 2000)

        # Print progress
        print(
            "[ep {}/{}] iter={}/{}, rew={:.0f}, nrewmax={}, mem={}, eps={:.3f}, loss={:.2f}"
            .format(episode + 1, n_episodes, i, n_iter_max, sum(rewards),
                    n_reward_max, len(agent.memory), agent.epsilon, loss),
            flush=True)

        if (episode == 0 or n_reward_max % 5 == 1):
            # Save model
            if save_models:
                modelFile = os.path.join(
                    modelDir,
                    "model_ep={}_rew={}.h5".format(episode + 1,
                                                   int(sum(rewards))))
                print("    -> saving agent model = {}".format(modelFile),
                      flush=True)
                agent.save_model(modelFile)
            # Save animation
            if save_animations:
                aniFile = os.path.join(
                    aniDir,
                    "ani_ep={}_rew={}.mp4".format(episode + 1,
                                                  int(sum(rewards))))
                print("    -> saving animation = {}".format(aniFile),
                      flush=True)
                env.make_animation(states,
                                   env,
                                   agent.sensors,
                                   save_ani=True,
                                   filename=aniFile)
            n_reward_max += 0 if (episode == 0) else 1

        # Train agent
        loss = agent.train()  # automatically adjusts epsilon

        # Save log
        header = ["episode", "iter", "reward", "loss", "epsilon", "time"]
        values = [
            episode + 1, i,
            sum(rewards), loss, agent.epsilon,
            datetime.datetime.now().strftime("%B %d %Y %I:%M:%S %p")
        ]
        with open(logFile, ('w' if episode == 0 else 'a')) as f:
            writer = csv.writer(f)
            if episode == 0:
                writer.writerow(header)
            writer.writerow(values)

        if (episode + 1) % 20 == 0 or (episode == n_episodes - 1):
            df = pd.read_csv(logFile)
            if df.shape[0] > 50:
                save_plot(["episode", "reward"],
                          df,
                          color=(13 / 255, 28 / 255, 164 / 255),
                          n_bins=50)
                save_plot(["episode", "loss"],
                          df,
                          color=(195 / 255, 0 / 255, 0 / 255),
                          n_bins=50)
Esempio n. 18
0
class Builder(object):
    """
    Class for the building of the working memory and
    the production memory after the parsing and
    the preliminary evaluation of a sequence
    of facts and rules.
    """
    def __init__(self):
        # Saves an instance of the Environment class.
        self.__environment = Environment()

        # Saves an instance of the FunctionMapper class.
        self.__function_mapper = FunctionMapper()

        # Loads the module containing the special functions available to the user.
        self.__function_mapper.load_class(SpecialFunctions)

        # Loads the module containing the functions available to the user.
        self.__function_mapper.load_class(Functions)

        # Loads the module containing the predicates available to the user.
        self.__function_mapper.load_class(Predicates)

        # Saves an instance of the Evaluator class, which requires as parameters
        # the references to the instances of the Environment containing the global variables
        # and of the FunctionMapper containing the mappings between the names of the functions
        # and the predicates available to the users and the corresponding implementations.
        self.__evaluator = Evaluator(self.__environment,
                                     self.__function_mapper)

    @property
    def evaluator(self):
        return self.__evaluator

    @evaluator.setter
    def evaluator(self, value):
        self.__evaluator = value

    def build(self, AST):
        # Saves a list to contain the facts.
        facts = []

        # Memorizza a list to contain the rules.
        rules = []

        for (item_type, item_content) in AST:
            # If the construct corresponds to the definition of a global variable, then
            # it is evaluated and the result is added in the set of global variables.
            if item_type == 'DEFGLOBAL_CONSTRUCT':
                for assignment in item_content:
                    print('Defining defglobal:', assignment.name)
                    assignment.content = self.__evaluator.evaluate(
                        assignment.content)
                    self.__environment.set_global_variable(assignment)

            # If the construct corresponds to the definition of a list of facts, then
            # the attributes of each fact are evaluated and the fact is added to the list of facts.
            elif item_type == 'DEFFACTS_CONSTRUCT':
                print('Defining deffacts:', item_content[0])
                for fact in item_content[1:]:
                    for i in range(len(fact[1])):
                        # Evaluates each attribute of the considered fact.
                        fact[1][i] = self.__evaluator.evaluate(fact[1][i])

                    # Saves an instance of the considered fact
                    # specifying its name and its attributes.
                    f = OrderedFact(fact[0], fact[1])

                    # Adds the current fact to the list of facts.
                    facts.append(f)

            # If the construct corresponds to the definition of a rule, then
            # its left and right parts are preliminarly evaluated and the rule
            # is added to the list of rules. Possible non global variables
            # will be evaluated after the matching phase between facts and rules.
            elif item_type == 'DEFRULE_CONSTRUCT':
                name = item_content[0][0]
                print('Defining defrule:', name)

                # Saves an instance of the considered rule.
                r = Rule(name)

                # If the rule contains a value of the salience,
                # then it is saved in the instance of the rule.
                if len(item_content[0]) > 1:
                    if item_content[0][1][0] is 'salience':
                        r.salience = item_content[0][1][1].content

                # Saves the left part in the instance of the rule.
                lhs = item_content[1]

                # Evaluates each element of the left part of the rule.
                for i in range(len(lhs)):

                    # Checks if the element of the left part of the rule represents a test.
                    if isinstance(lhs[i][0], SpecialTestCallType):

                        # Builds the set of the variables contained in the test.
                        lhs[i][0].build()

                        # Builds, in the rule containing the test, a dictionary in the form:
                        # variable_involved_in_the_test : list_of_references_to_tests_which_contains_it.
                        for v in lhs[i][0].test_variables:
                            if not v in r.variable_tests:
                                r.variable_tests[v] = {lhs[i][0]}
                            else:
                                r.variable_tests[v].add(lhs[i][0])

                        # Adds the test to instance of the rule.
                        r.tests.add(lhs[i][0])

                    else:
                        # If the current element of the left part
                        # of the rule is not a test, then it is evaluated.
                        lhs[i] = self.__evaluator.evaluate(lhs[i])

                        # Adds the current element to the left part of the instance of the rule.
                        r.lhs.append(lhs[i])

                # Evaluates the complexity of the rule.
                r.evaluate_complexity()

                # Saves the right part in the instance of the rule.
                r.rhs = item_content[2]

                # Adds the rule to the list of rules.
                rules.append(r)

        # Returns the lists of identified facts and rules.
        return (facts, rules)

    def build_assert(self, ast):
        facts = []

        for fact in ast[1:]:
            print('Defining fact:', fact[0])

            for i in range(len(fact[1])):
                # Evaluates each attribute of the considered fact.
                fact[1][i] = self.__evaluator.evaluate(fact[1][i])

            # Saves an instance of the considered fact
            # specifying its name and its attributes.
            f = OrderedFact(fact[0], fact[1])

            # Adds the considered fact to the Working Memory.
            facts.append(f)

        return facts

    def reset(self):
        self.__environment.clear_local_variables()
        self.__environment.clear_test_variables()
        self.__evaluator = Evaluator(self.__environment,
                                     self.__function_mapper)