def __init__(self): self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS) self.agent = DDQNAgent(learning_rate=0.001, epsilon=0.9, epsilon_decay=0.99, gamma=0.8, batch_size=64, buffer_size=10000, min_memory_size=500, tau=0.1) self.agent.model = self.agent.create_model() self.agent.target_model = self.agent.create_model()
def mc(args, params: EnvironmentParams): if args.num_agents is not None: num_range = [int(i) for i in args.num_agents] params.grid_params.num_agents_range = num_range try: env = Environment(params) env.agent.load_weights(args.weights) env.eval(int(args.samples), show=args.show) except AttributeError: print("Not overriding log dir, eval existing:") eval_logs("logs/training/" + args.id + "/test")
def __init__(self): self.clock: pygame.time.Clock = pygame.time.Clock() # PyGame Clock to set frame rate self.screen: pygame.screen = pygame.display.set_mode((WIDTH, HEIGHT)) # Window where simulation is played self.environment: Environment = Environment() # Environment where the robot is placed self.robot: Robot = Robot(Const.START_POS) # Robot self.keys: List[Dict[pygame.key, Callable, bool]] = [ # Action keys with relative callback dict(key_code=[pygame.K_KP7], callback=self.robot.increase_left, hold=False, pressed=False), dict(key_code=[pygame.K_KP4], callback=self.robot.decrease_left, hold=False, pressed=False), dict(key_code=[pygame.K_KP9], callback=self.robot.increase_right, hold=False, pressed=False), dict(key_code=[pygame.K_KP6], callback=self.robot.decrease_right, hold=False, pressed=False), dict(key_code=[pygame.K_KP8], callback=self.robot.increase_both, hold=False, pressed=False), dict(key_code=[pygame.K_KP5], callback=self.robot.decrease_both, hold=False, pressed=False), dict(key_code=[pygame.K_w], callback=self.robot.increase_both, hold=True, pressed=False), dict(key_code=[pygame.K_s], callback=self.robot.decrease_both, hold=True, pressed=False), dict(key_code=[pygame.K_x, pygame.K_r], callback=self.robot.stop, hold=False, pressed=False), dict(key_code=[pygame.K_a], callback=self.robot.rotate_left, hold=True, pressed=False), dict(key_code=[pygame.K_d], callback=self.robot.rotate_right, hold=True, pressed=False), dict(key_code=[pygame.K_KP_MULTIPLY], callback=self.robot.toggle_sensor, hold=False, pressed=False), dict(key_code=[pygame.K_m], callback=self.toggle_test_mode, hold=False, pressed=False), dict(key_code=[pygame.K_n], callback=self.do_robot_update, hold=False, pressed=False) ] self.done: bool = False # Window closed ? pygame.display.set_caption("ARS_Robot_Simulation") # Window title icon = pygame.image.load('images/robot.png') # Window icon pygame.display.set_icon(icon) self.test_mode = False
def testing_loop(): agents = [] for file in glob.glob("saved_agents\\policy_*.h5"): agents.append(file) # create envoirment environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS) for agent in agents: print("for agent: " + agent) #start envoirment state = environment.reset() # load load agent by file name agent = loadAgent(agent) agent.drone = environment.drones[0] for step in range(MAX_STEPS): #make move is updating the drone location action = agent.make_move(environment, state) # rendering the envoirment in new location environment.render() state_, reward, done = environment.get_info(agent.drone) #self.agent.store_experience([state, action, reward, state_, done]) state = state_ if done: print("reached to goal state") break
def __init__(self): # Saves an instance of the Environment class. self.__environment = Environment() # Saves an instance of the FunctionMapper class. self.__function_mapper = FunctionMapper() # Loads the module containing the special functions available to the user. self.__function_mapper.load_class(SpecialFunctions) # Loads the module containing the functions available to the user. self.__function_mapper.load_class(Functions) # Loads the module containing the predicates available to the user. self.__function_mapper.load_class(Predicates) # Saves an instance of the Evaluator class, which requires as parameters # the references to the instances of the Environment containing the global variables # and of the FunctionMapper containing the mappings between the names of the functions # and the predicates available to the users and the corresponding implementations. self.__evaluator = Evaluator(self.__environment, self.__function_mapper)
def check_collisions(self, environment: Environment, current_pos: np.ndarray, next_pos: np.ndarray, prev_collision: List[Collision]) -> np.ndarray: collisions = environment.collides(current_pos, next_pos) if len(collisions) == 0 or get_x_y(next_pos) == (0, 0): return next_pos else: closest_line = self.closest_collision(collisions, current_pos) t_current_pos, t_next_pos = self.recalc_next_pos( current_pos, next_pos, closest_line) # if self.recalc_next_pos(current_pos, next_pos, closest_line)[1].shape == (2,2): # t_current_pos, t_next_pos = self.recalc_next_pos(current_pos, next_pos, closest_line) new_collisions = environment.collides(t_current_pos, t_next_pos) prev_collision.append(closest_line.line) if len(new_collisions) == 0: return t_next_pos else: if new_collisions[0].line in prev_collision: return current_pos else: return self.check_collisions(environment, t_current_pos, t_next_pos, prev_collision)
class Main: train_results = "Train results 11042020F" def __init__(self): self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS) self.agent = PolicyAgent() # create agent with default parameters #self.agent.load_model("saved_agents\\policy_agent.h5") # generating plots def generate_plot(self, data: list): fig, ax = plt.subplots() ax.plot(data, 'r', label=f"{self.train_results}") ax.set(xlabel="Episode", ylabel="Distance", title="") ax.grid() plt.legend(loc='upper left') plt.draw() fig.savefig(f'results\\{self.train_results}.png', dpi=1200) # testing phase-> def training_loop(self): count = 0 # iterate on epochs for episode in range(TOTAL_EPOCHS): print("Epoch # " + str(episode + 1)) # reinitialize the envoirment state = self.environment.reset() # set agent drone to update location self.agent.drone = self.environment.drones[0] # maximum number of steps to move for step in range(MAX_STEPS): # agent get the state and decide move, more detail in available in the function action = self.agent.make_move(self.environment, state) # make move function has updated the drone location now render the env self.environment.render() # check for the reward, get next location and if we reached to goal state state_, reward, done = self.environment.get_info( self.agent.drone) self.agent.addState(state) self.agent.store_experience( [state, action, reward, state_, done]) state = state_ # if took specified number of steps it will update policy states if len(self.agent.memory) > self.agent.min_memory_size: self.agent.update(self.environment) if done: break # update learning rate self.agent.update_epsilon() # after every iteration store the model if (episode + 1) % (NUMBER_OF_EPOCHS) == 0: print("saving model...") self.agent.save_model( "saved_agents\\policy_agent_{}.h5".format(count)) count += 1 # turn off the env self.environment.close()
class Main: train_results = "Train results 11042020F" def __init__(self): self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS) self.agent = DDQNAgent(learning_rate=0.001, epsilon=0.9, epsilon_decay=0.99, gamma=0.8, batch_size=64, buffer_size=10000, min_memory_size=500, tau=0.1) self.agent.model = self.agent.create_model() self.agent.target_model = self.agent.create_model() # generating plots def generate_plot(self, data: list): fig, ax = plt.subplots() ax.plot(data, 'r', label=f"{self.train_results}") ax.set(xlabel="Episode", ylabel="Distance", title="") ax.grid() plt.legend(loc='upper left') plt.draw() fig.savefig(f'results\\{self.train_results}.png', dpi=1200) # testing phase-> def training_loop(self): for episode in range(NUMBER_OF_EPOCHS): print(episode) state = self.environment.reset() self.agent.drone = self.environment.drones[0] for step in range(MAX_STEPS): action = self.agent.make_move(self.environment, state) self.environment.render() state_, reward, done = self.environment.get_info(self.agent.drone) self.agent.store_experience([state, action, reward, state_, done]) state = state_ if len(self.agent.memory) > self.agent.min_memory_size: self.agent.update(self.environment) self.agent.update_target_weights() if done: break self.agent.update_epsilon() self.generate_plot(self.environment.drones[0].distance_history) # self.agent.generate_loss() self.agent.save_model() self.environment.close() # self.testing_loop() def testing_loop(self): model = keras.models.load_model("saved_agents\\agent.h5")
class Checker: def __init__(self): self.env = Environment() def reminder(self, file): time = self.env.getTime() if time > 17 and self.env.wavWasPlayed(file) is False: self.env.playWavFile(file) else: self.env.resetWav(file)
def build_network(text, is_with_file): parser = Parser() builder = Builder() function_mapper = FunctionMapper() environment = Environment() strategy = BreadthStrategy() if is_with_file: parsed_file = parser.parse_file(text) (facts, rules) = builder.build(parsed_file) evaluator = builder.evaluator network = Network(evaluator, strategy) print("Building network from file") result = network.build_network(facts, rules) print("Network built - working on transforming it!") transformed = transform_states(result) print("Transformed - > returning result") return FileWithStates(get_text(text), transformed) else: parsed_text = parser.parseProgram(text) (facts, rules) = builder.build(parsed_text) evaluator = builder.evaluator network = Network(evaluator, strategy) print("Building network from text") result = network.build_network(facts, rules) print("Network built - working on transforming it!") transformed = transform_states(result) print("Transformed -> returning result") return transformed
def __init__(self): self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS) self.agent = PolicyAgent() # create agent with default parameters
parser.add_argument('--generate_config', action='store_true', help='Enable to write default config only') parser.add_argument('--config', default=None, help='Path to config file') parser.add_argument('--id', default=None, help='Overrides the logfile name and the save name') args = parser.parse_args() if args.generate_config: generate_config(EnvironmentParams(), "config/default.json") exit(0) if args.config is None: print("Config file needed!") exit(1) if not args.gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" params = read_config(args.config) if args.id is not None: params.model_stats_params.save_model = "models/" + args.id params.model_stats_params.log_file_name = args.id env = Environment(params) env.run()
from src.GPSHandler import GPSHandler gpsHandler = GPSHandler(environment) gpsHandler.start() # Initiate LiveData from src.LiveData import LiveData liveData = LiveData(environment) liveData.start() # ECUHandler from src.ECUHandler import ECUHandler ecuHandler = ECUHandler(environment) ecuHandler.start() try: # Environment from src.Environment import Environment environment = Environment() environment.start() gui = GUI(environment) thread.start_new_thread(initClasses, ()) #Start gui and enter its mainloop gui.start() except (KeyboardInterrupt, SystemExit): print("Exiting...") sys.exit()
def __init__(self): self.env = Environment()
import tensorflow as tf from src.Environment import Environment from src.Agents import DQNAgent from src.utils import mkdirs, save_plot, save_animation tf.random.set_seed(1) gpu_devices = tf.config.experimental.list_physical_devices('GPU') for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True) random.seed(a=0) np.random.seed(0) output_dir = mkdirs(os.path.join(os.path.dirname(__file__), "output")) models_dir = mkdirs(os.path.join(output_dir, "models")) animations_dir = mkdirs(os.path.join(output_dir, "animations")) log_file = os.path.join(output_dir, "log.csv") env = Environment(n_good=0, n_bad=110) agent = DQNAgent(env, n_sectors=4, sector_radius=1.0) save_models = False save_animations = True n_episodes = 1000 iter_max = 2000 n_reward_max = 0 loss = -1 # track loss for episode in range(n_episodes): iter = 0 env.reset() # reset environment ob = agent.observe(env) # observe while iter < iter_max: action = agent.get_action(ob) # follow epsilon-greedy policy state_next, reward, done = env.step(action) # evolve ob_next = agent.observe(env) # observe
# Initiate LiveData from src.LiveData import LiveData liveData = LiveData(environment) liveData.start() # ECUHandler from src.ECUHandler import ECUHandler ecuHandler = ECUHandler(environment) ecuHandler.start() try: # Environment from src.Environment import Environment environment = Environment() environment.start() gui = GUI(environment) thread.start_new_thread(initClasses, ()) #Start gui and enter its mainloop gui.start() except (KeyboardInterrupt, SystemExit): print("Exiting...") sys.exit()
def main(): # ==================== # Run # ==================== outDir = make_dir(os.path.join(os.path.dirname(__file__), "output")) modelDir = make_dir(os.path.join(outDir, "models")) aniDir = make_dir(os.path.join(outDir, "animations")) logFile = os.path.join(outDir, "log.csv") env = Environment(n_good=0, n_bad=110) agent = DQNAgent(env=env, sensors=Sensors(n_sectors=4, sector_radius=1.0)) save_models = True save_animations = True n_episodes = 1000 n_iter_max = 2000 n_reward_max = 0 loss = -1 # track training loss for episode in range(n_episodes): # Generate episode state = env.reset() # initialize environment obs = agent.observe(state, env) # observe states, rewards, i = [], [], 0 while i < n_iter_max: action = agent.get_action(obs) # follow epsilon-greedy policy state_next, reward, done = env.step(state, action) # evolve obs_next = agent.observe(state, env) # observe agent.memorize((obs, action, reward, obs_next, done)) # memorize rewards.append(reward) states.append(state_next) state, obs = state_next, obs_next # transition i += 1 if done: break # terminate # Track highly successful episodes n_reward_max += (sum(rewards) >= 2000) # Print progress print( "[ep {}/{}] iter={}/{}, rew={:.0f}, nrewmax={}, mem={}, eps={:.3f}, loss={:.2f}" .format(episode + 1, n_episodes, i, n_iter_max, sum(rewards), n_reward_max, len(agent.memory), agent.epsilon, loss), flush=True) if (episode == 0 or n_reward_max % 5 == 1): # Save model if save_models: modelFile = os.path.join( modelDir, "model_ep={}_rew={}.h5".format(episode + 1, int(sum(rewards)))) print(" -> saving agent model = {}".format(modelFile), flush=True) agent.save_model(modelFile) # Save animation if save_animations: aniFile = os.path.join( aniDir, "ani_ep={}_rew={}.mp4".format(episode + 1, int(sum(rewards)))) print(" -> saving animation = {}".format(aniFile), flush=True) env.make_animation(states, env, agent.sensors, save_ani=True, filename=aniFile) n_reward_max += 0 if (episode == 0) else 1 # Train agent loss = agent.train() # automatically adjusts epsilon # Save log header = ["episode", "iter", "reward", "loss", "epsilon", "time"] values = [ episode + 1, i, sum(rewards), loss, agent.epsilon, datetime.datetime.now().strftime("%B %d %Y %I:%M:%S %p") ] with open(logFile, ('w' if episode == 0 else 'a')) as f: writer = csv.writer(f) if episode == 0: writer.writerow(header) writer.writerow(values) if (episode + 1) % 20 == 0 or (episode == n_episodes - 1): df = pd.read_csv(logFile) if df.shape[0] > 50: save_plot(["episode", "reward"], df, color=(13 / 255, 28 / 255, 164 / 255), n_bins=50) save_plot(["episode", "loss"], df, color=(195 / 255, 0 / 255, 0 / 255), n_bins=50)
class Builder(object): """ Class for the building of the working memory and the production memory after the parsing and the preliminary evaluation of a sequence of facts and rules. """ def __init__(self): # Saves an instance of the Environment class. self.__environment = Environment() # Saves an instance of the FunctionMapper class. self.__function_mapper = FunctionMapper() # Loads the module containing the special functions available to the user. self.__function_mapper.load_class(SpecialFunctions) # Loads the module containing the functions available to the user. self.__function_mapper.load_class(Functions) # Loads the module containing the predicates available to the user. self.__function_mapper.load_class(Predicates) # Saves an instance of the Evaluator class, which requires as parameters # the references to the instances of the Environment containing the global variables # and of the FunctionMapper containing the mappings between the names of the functions # and the predicates available to the users and the corresponding implementations. self.__evaluator = Evaluator(self.__environment, self.__function_mapper) @property def evaluator(self): return self.__evaluator @evaluator.setter def evaluator(self, value): self.__evaluator = value def build(self, AST): # Saves a list to contain the facts. facts = [] # Memorizza a list to contain the rules. rules = [] for (item_type, item_content) in AST: # If the construct corresponds to the definition of a global variable, then # it is evaluated and the result is added in the set of global variables. if item_type == 'DEFGLOBAL_CONSTRUCT': for assignment in item_content: print('Defining defglobal:', assignment.name) assignment.content = self.__evaluator.evaluate( assignment.content) self.__environment.set_global_variable(assignment) # If the construct corresponds to the definition of a list of facts, then # the attributes of each fact are evaluated and the fact is added to the list of facts. elif item_type == 'DEFFACTS_CONSTRUCT': print('Defining deffacts:', item_content[0]) for fact in item_content[1:]: for i in range(len(fact[1])): # Evaluates each attribute of the considered fact. fact[1][i] = self.__evaluator.evaluate(fact[1][i]) # Saves an instance of the considered fact # specifying its name and its attributes. f = OrderedFact(fact[0], fact[1]) # Adds the current fact to the list of facts. facts.append(f) # If the construct corresponds to the definition of a rule, then # its left and right parts are preliminarly evaluated and the rule # is added to the list of rules. Possible non global variables # will be evaluated after the matching phase between facts and rules. elif item_type == 'DEFRULE_CONSTRUCT': name = item_content[0][0] print('Defining defrule:', name) # Saves an instance of the considered rule. r = Rule(name) # If the rule contains a value of the salience, # then it is saved in the instance of the rule. if len(item_content[0]) > 1: if item_content[0][1][0] is 'salience': r.salience = item_content[0][1][1].content # Saves the left part in the instance of the rule. lhs = item_content[1] # Evaluates each element of the left part of the rule. for i in range(len(lhs)): # Checks if the element of the left part of the rule represents a test. if isinstance(lhs[i][0], SpecialTestCallType): # Builds the set of the variables contained in the test. lhs[i][0].build() # Builds, in the rule containing the test, a dictionary in the form: # variable_involved_in_the_test : list_of_references_to_tests_which_contains_it. for v in lhs[i][0].test_variables: if not v in r.variable_tests: r.variable_tests[v] = {lhs[i][0]} else: r.variable_tests[v].add(lhs[i][0]) # Adds the test to instance of the rule. r.tests.add(lhs[i][0]) else: # If the current element of the left part # of the rule is not a test, then it is evaluated. lhs[i] = self.__evaluator.evaluate(lhs[i]) # Adds the current element to the left part of the instance of the rule. r.lhs.append(lhs[i]) # Evaluates the complexity of the rule. r.evaluate_complexity() # Saves the right part in the instance of the rule. r.rhs = item_content[2] # Adds the rule to the list of rules. rules.append(r) # Returns the lists of identified facts and rules. return (facts, rules) def build_assert(self, ast): facts = [] for fact in ast[1:]: print('Defining fact:', fact[0]) for i in range(len(fact[1])): # Evaluates each attribute of the considered fact. fact[1][i] = self.__evaluator.evaluate(fact[1][i]) # Saves an instance of the considered fact # specifying its name and its attributes. f = OrderedFact(fact[0], fact[1]) # Adds the considered fact to the Working Memory. facts.append(f) return facts def reset(self): self.__environment.clear_local_variables() self.__environment.clear_test_variables() self.__evaluator = Evaluator(self.__environment, self.__function_mapper)