def test_linear_sarsa(iterations=1000, mlambda=None, n0=100, avg_it=100): print "\n-------------------" print "TD control Sarsa, with Linear function approximation" print "run for n. iterations: " + str(iterations) print "plot graph mse vs episodes for lambda equal 0 and lambda equal 1" print "list (std output) win percentage for values of lambda 0, 0.1, 0.2, ..., 0.9, 1" monte_carlo_Q = pickle.load( open("Data/Qval_func_1000000_MC_control.pkl", "rb")) n_elements = monte_carlo_Q.shape[0] * monte_carlo_Q.shape[1] * 2 mse = [] if not isinstance(mlambda, list): # if no value is passed for lambda, default 0.5 l = 0.5 if mlambda == None else mlambda # learn game = Environment() agent = Agent(game, n0) agent.TD_control_linear(iterations, l, avg_it) agent.show_statevalue_function() else: # test each value of lambda for l in mlambda: game = Environment() agent = Agent(game, n0) l_mse = agent.TD_control_linear(iterations, l, avg_it) mse.append(l_mse) plt.plot(mlambda, mse) plt.ylabel('mse') plt.show()
def __init__(self, gnet, opt, global_ep, global_ep_r, res_queue, name, agent_port, monitor_port): super(Worker, self).__init__() self.name = 'w%i' % name self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue self.gnet, self.opt = gnet, opt self.env = Environment(agent_port=agent_port, monitor_port=monitor_port) self.lnet = Net(N_S, N_A)
def init_env(self): if self.env is not None: del self.env self.env = Environment() for i in range(param.NB_AGENTS): self.env.add_agent() for i in range(param.NB_RANDOM_AGENTS): self.env.add_agent(False)
def run_sequence_ranges(self, params_ranges_list, nb_sequences): continuer = 1 configurations = Environment.build_list(params_ranges_list) for configuration in configurations: if not continuer: break continuer = 1 (mass, charge, polarizability, dipole_moment, stiffness) = configuration for i in range(0, param.NB_OCCURRENCES): if not continuer: break self.nb_sequences = nb_sequences self.init_env() while continuer and (self.nb_sequences != 0): self.env.actualize(mass=mass, charge=charge, polarizability=polarizability, dipole_moment=dipole_moment, stiffness=stiffness) if self.nb_sequences > 0: self.nb_sequences -= 1 self.nb_sequences = -1
def test_monte_carlo(iterations=1000000, n0=100): print "\n-------------------" print "Monte Carlo control" print "run for n. iterations: " + str(iterations) print "win percentage: " # learn game = Environment() agent = Agent(game, n0) agent.MC_control(iterations) # plot and store agent.show_statevalue_function() agent.store_Qvalue_function()
class Worker(mp.Process): def __init__(self, gnet, opt, global_ep, global_ep_r, res_queue, name, agent_port, monitor_port): super(Worker, self).__init__() self.name = 'w%i' % name self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue self.gnet, self.opt = gnet, opt self.env = Environment(agent_port=agent_port, monitor_port=monitor_port) self.lnet = Net(N_S, N_A) # if is_gpu_available: # self.lnet = self.lnet.cuda() def run(self): try: while self.g_ep.value < MAX_EP: s = self.env.reset() buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. if self.g_ep.value % 20 == 19: torch.save(self.gnet, MODEL_NAME + ".pt") if self.g_ep.value % 2000 == 19: torch.save( self.gnet, MODEL_NAME + "_" + str(self.g_ep.value // 2000) + ".pt") for t in range(MAX_EP_STEP): a = self.lnet.choose_action(v_wrap(s[None, :]), t) # if is_gpu_available: # a = self.lnet.choose_action(torch.from_numpy(s).float().cuda(), t) # else: # a = self.lnet.choose_action(v_wrap(s[:]), t) s_, r, done, _ = self.env.step(a, self.g_ep.value) if t == MAX_EP_STEP - 1: done = True if s is not None: ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if done: # Sync Global and Local Nets push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ self.res_queue.put(None) self.env.cleanup() except (KeyboardInterrupt): self.res_queue.put(None) self.env.cleanup()
def run_params(self, mass, charge, polarizability, dipole_moment, nb_sequences): """Run the session with the indicated parameters, if no data exists for entropy, create some""" data = Environment.get_probability_grid_config(mass, charge, polarizability, dipole_moment, nb_sequences) if data is None: self.run_sequence(mass, charge, polarizability, dipole_moment, nb_sequences) self.nb_sequences = nb_sequences params = (mass, charge, polarizability, dipole_moment) (self.mass_scale.value, self.charge_scale.value, self.dipole_moment_scale.value, self.polarizability_scale.value) = params self.run(params)
os.environ["OMP_NUM_THREADS"] = "4" # Training Hyperparameters GAMMA = 1 MAX_EP = 40000 MAX_EP_STEP = 200 LEARNING_RATE = 0.0001 NUM_WORKERS = 5 # Model IO Parameters MODEL_NAME = "ho" LOAD_MODEL = False TEST_MODEL = False # Neural Network Architecture Variables ENV_DUMMY = Environment() N_S, N_A = ENV_DUMMY.state_dim, ENV_DUMMY.action_dim Z1 = 100 Z2 = 100 # Gpu use flag # is_gpu_available = torch.cuda.is_available() class Net(nn.Module): def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.a1 = nn.Linear(s_dim, Z1) self.a2 = nn.Linear(Z1, Z2)
from Vehicle_Physics.position import EarthPosition from Earth_Models.flat_earth import EulerFlatEarth from Utility.Signal_Generator import Constant, Doublet from simulator import Simulation import matplotlib.pyplot as plt aircraft = Skyhawk() atmosphere = ISA1976() gravity = VerticalConstant() wind = NoWind() environment = Environment(atmosphere, gravity, wind) pos = EarthPosition(x=0, y=0, height=10000) psi = 0.5 # rad TAS = 600 # m/s, about mach 2 @ 10,000 controls0 = { 'delta_elevator': 0, 'delta_aileron': 0, 'delta_rudder': 0, 'delta_t': 0.5 } trimmed_state, trimmed_controls = steady_state_trim(aircraft, environment, pos, psi, TAS, controls0) system = EulerFlatEarth(time0=0, tot_state=trimmed_state)
""" aircraft = Skyhawk() """ print(f"Aircraft mass: {aircraft.mass} kg") print(f"Aircraft inertia tensor: \n {aircraft.inertia} kg/m²") print(f"forces: {aircraft.forces} N") print(f"moments: {aircraft.moments} N·m") print(aircraft.controls) print(aircraft.control_limits) """ """[Environment set up and definition] """ atmosphere = ISA1976() gravity = VerticalConstant() wind = NoWind() environment = Environment(atmosphere, gravity, wind) pos = EarthPosition(x=0, y=0, height=10000) psi = 0.0 #rad TAS = 600 #true airspeed m/s controls_init = { 'delta_elevator': 0, 'delta_aileron': 0, 'delta_rudder': 0, 'delta_t': 0.0 } trimmed_state, trimmed_controls = steady_state_trim(aircraft, environment, pos, psi, TAS, controls_init) print(f"Trimmed State:{trimmed_state}") #print(f"Trimmed Controls: {trimmed_controls}")
from Environment.environment import Environment if __name__ == "__main__": env = Environment(maxgenerations = 100, size = 1000, optimum=100, crossover_rate=0.84, mutation_rate= 0.20 ) env.run()
from Environment.environment import Environment if __name__ == "__main__": env = Environment(maxgenerations=400, size= 200 , optimum= 80) env.run()
class Gui: def init_env(self): if self.env is not None: del self.env self.env = Environment() for i in range(param.NB_AGENTS): self.env.add_agent() for i in range(param.NB_RANDOM_AGENTS): self.env.add_agent(False) def __init__(self): # initialisation dans le run self.env = None # Initialize sliders (and actual starting values) here self.current_mass_value = param.STARTING_MASS_VALUE self.current_charge_value = param.STARTING_CHARGE_VALUE self.current_dipole_moment = param.STARTING_DIPOLE_MOMENT self.current_polarizability = param.STARTING_POLARIZABILITY self.current_stiffness = param.STARTING_STIFFNESS self.current_friction = param.STARTING_FRICTION self.sim_running = param.STARTING_SIM_RUNNING self.nb_sequences = -1 pygame.init() pygame.display.init() self.info = pygame.display.Info() self.dw = int(self.info.current_w / 3) self.dh = int(self.info.current_h / 3) self.screen = sgc.surface.Screen((2 * self.dw, 2 * self.dh)) self.fgColor = (0, 0, 0) self.bgColor = (255, 255, 255) btn = sgc.Button(label="Run/Pause", pos=(10, 440)) btn.on_click = self.change_sim_state btn.add(5) # Particle mass scale self.mass_scale = sgc.Scale(label="Particle mass", label_side="top", label_col=self.fgColor, pos=(10, 20), min=1, max=100, min_step=1, max_step=99) self.mass_scale.add(0) # Particle charge scale self.charge_scale = sgc.Scale(label="Particle charge", label_side="top", label_col=self.fgColor, pos=(10, 90), min=0, max=100, min_step=1, max_step=100) self.charge_scale.add(1) self.dipole_moment_scale = sgc.Scale(label="Particle dipole moment", label_side="top", label_col=self.fgColor, pos=(10, 160), min=0, max=100, min_step=1, max_step=100) self.dipole_moment_scale.add(2) self.polarizability_scale = sgc.Scale(label="Particle polarizability", label_side="top", label_col=self.fgColor, pos=(10, 230), min=0, max=100000, min_step=1, max_step=99999) self.polarizability_scale.add(3) self.stiffness_scale = sgc.Scale(label="Stiffness", label_side="top", label_col=self.fgColor, pos=(10, 300), min=0, max=100, min_step=1, max_step=100) self.stiffness_scale.add(4) self.friction_scale = sgc.Scale(label="Friction", label_side="top", label_col=self.fgColor, pos=(10, 370), min=0, max=100, min_step=1, max_step=100) self.friction_scale.add(5) self.mass_scale.value = self.current_mass_value self.charge_scale.value = self.current_charge_value self.dipole_moment_scale.value = self.current_dipole_moment self.polarizability_scale.value = self.current_polarizability self.stiffness_scale.value = self.current_stiffness self.friction_scale.value = self.current_friction self.clock = pygame.time.Clock() def run_params(self, mass, charge, polarizability, dipole_moment, nb_sequences): """Run the session with the indicated parameters, if no data exists for entropy, create some""" data = Environment.get_probability_grid_config(mass, charge, polarizability, dipole_moment, nb_sequences) if data is None: self.run_sequence(mass, charge, polarizability, dipole_moment, nb_sequences) self.nb_sequences = nb_sequences params = (mass, charge, polarizability, dipole_moment) (self.mass_scale.value, self.charge_scale.value, self.dipole_moment_scale.value, self.polarizability_scale.value) = params self.run(params) def run_sequence(self, mass, charge, polarizability, dipole_moment, stiffness, nb_sequences): ranges_list = (range(mass, mass + 1), range(charge, charge + 1), range(polarizability, polarizability + 1), range(dipole_moment, dipole_moment + 1), range(stiffness, stiffness + 1)) self.run_sequence_ranges(ranges_list, nb_sequences) def run_sequence_ranges(self, params_ranges_list, nb_sequences): continuer = 1 configurations = Environment.build_list(params_ranges_list) for configuration in configurations: if not continuer: break continuer = 1 (mass, charge, polarizability, dipole_moment, stiffness) = configuration for i in range(0, param.NB_OCCURRENCES): if not continuer: break self.nb_sequences = nb_sequences self.init_env() while continuer and (self.nb_sequences != 0): self.env.actualize(mass=mass, charge=charge, polarizability=polarizability, dipole_moment=dipole_moment, stiffness=stiffness) if self.nb_sequences > 0: self.nb_sequences -= 1 self.nb_sequences = -1 def run(self, params=None): restart = True if params is not None: (self.current_mass_value, self.current_charge_value, self.current_polarizability, self.current_dipole_moment, self.current_stiffness) = params while restart: self.init_env() continuer = 1 self.env.data_store.clear() while continuer and (self.nb_sequences != 0): # probably better not to update values on each step # it will have to do for now ! if self.sim_running: self.env.actualize( mass=self.current_mass_value, charge=self.current_charge_value, polarizability=self.current_polarizability, dipole_moment=self.current_dipole_moment, stiffness=self.current_stiffness, friction=self.current_friction) if self.nb_sequences > 0: self.nb_sequences -= 1 continuer, restart = self.pygame_event_managing(params is None) self.pygame_display_managing() if param.DRAW_GRAPHS: self.plot_all() def plot_all(self): plt.subplot(3, 2, 1) self.draw_dict("Temperature", self.env.data_store.temperature) plt.subplot(3, 2, 2) self.draw_dict("Volume", self.env.data_store.volume) plt.subplot(3, 2, 3) self.draw_dict("Pressure", self.env.data_store.pressure) plt.subplot(3, 2, 4) self.draw_dict("Entropy", self.env.data_store.entropy) plt.subplot(3, 2, 5) self.draw_dict_f_dict(self.env.data_store.pressure, self.env.data_store.temperature, "pressure", "temperature") plt.subplot(3, 2, 6) self.draw_dict_f_dict(self.env.data_store.pressure, self.env.data_store.volume, "pressure", "volume", show=True) plt.subplot(3, 2, 1) self.draw_dict("Pressure (borders)", self.env.data_store.border_collision_range, name_x="Time (" + str(param.DELTA_TIME * param.RANGE_COLLISIONS_GRAPH) + " s)") plt.subplot(3, 2, 2) self.draw_dict("Thermodynamic potential", self.env.data_store.thermodynamic_potential) plt.subplot(3, 2, 3) self.draw_dict("Internal Energy", self.env.data_store.internal_energy) plt.subplot(3, 2, 4) self.draw_dict("Enthalpy", self.env.data_store.enthalpy) plt.subplot(3, 2, 5) self.draw_dict("Free Enthalpy", self.env.data_store.free_enthalpy) plt.subplot(3, 2, 6) self.draw_dict("Free Energy", self.env.data_store.free_energy, show=True) plt.subplot(3, 2, 1) self.draw_dict("Ecart-type 1", self.env.data_store.dist_standard_deviation) plt.subplot(3, 2, 2) self.draw_dict("Ecart-type min", self.env.data_store.dist_standard_deviation_min, show=True) def pygame_display_managing(self): time = self.clock.tick() self.screen.fill(self.bgColor) pxarray = pygame.PixelArray(self.screen.image) for el in self.env.agent_list: self.draw_point(el.position, pxarray, el.get_color()) for el in self.env.object_list: self.draw_point(el.position, pxarray) del pxarray sgc.update(time) if (param.BORDER_MODE != param.BorderMode.NONE): pygame.draw.rect( self.screen.image, self.fgColor, (self.dw - param.BOX_SIZE / 2, self.dh - param.BOX_SIZE / 2, param.BOX_SIZE, param.BOX_SIZE), 1) pygame.display.flip() def pygame_event_managing(self, param_change_allowed): continuer = 1 restart = False for event in pygame.event.get( ): # On parcours la liste de tous les événements reçus sgc.event(event) if event.type == GUI: print(event) if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: continuer = 0 if event.key == pygame.K_r: continuer = 0 restart = True elif event.type == QUIT: continuer = 0 elif event.type == MOUSEBUTTONUP and param_change_allowed: self.current_mass_value = self.mass_scale.value self.current_charge_value = self.charge_scale.value self.current_dipole_moment = self.dipole_moment_scale.value self.current_polarizability = self.polarizability_scale.value self.current_stiffness = self.stiffness_scale.value self.current_friction = self.friction_scale.value return continuer, restart def change_sim_state(self): self.sim_running = not self.sim_running def draw_dict(self, name, dict, name_x=None, show=False): if len(dict) != 0: plt.plot(list(dict.keys()), dict.values()) #plt.title(name + " evolution") if name_x is None: plt.xlabel("Time (" + str(param.DELTA_TIME) + " s)") else: plt.xlabel(name_x) plt.ylabel(name) if show: plt.show() def draw_all(self, dict_list): for dict in dict_list: plt.plot(list(dict.keys()), dict.values()) plt.title("All") plt.xlabel("Time (" + str(self.env.deltaTime) + " s)") plt.ylabel("All") plt.show() def draw_dict_f_dict(self, dict, dict2, name, name2, show=False): result = {} for k in dict.keys(): result[dict[k]] = dict2[k] self.draw_dict(name, result, name2, show=show) def draw_point(self, pos, pxarray, color=(0, 0, 0)): radius = param.PARTICULE_RADIUS (x, y) = pos x = int(x) y = int(y) if (x not in range(-self.dw - radius, self.dw - radius)) or (y not in range( -self.dh - radius, self.dh - radius)): return x += self.dw y += self.dh if param.DRAW_PERCEPTION_RADIUS: pygame.draw.circle(self.screen.image, self.fgColor, (x, y), param.PERCEPTION_RADIUS, 1) for i in range(x - radius, x + radius + 1): for j in range(y - radius, y + radius + 1): pxarray[i, j] = color def __del__(self): pygame.quit()