def reset(self, relaunch=False): self.time_step = 0 if not self.initial_reset: self.client.R.d['meta'] = True self.client.respond_to_server() if relaunch is True: self._set_track() self.container.exec_run("kill_torcs.sh", detach=True) self.container.exec_run("start_torcs.sh", detach=True) self.client = snakeoil3.Client(p=self.port) self.client.MAX_STEPS = np.inf self.client.get_servers_input() obs = self.client.S.d self.observation = self._make_observaton(obs) self.last_u = None self.initial_reset = False return self.get_obs()
def reset(self, relaunch=False): #print("Reset") self.time_step = 0 if self.initial_reset is not True: self.client.R.d['meta'] = True self.client.respond_to_server() ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! if relaunch is True: self.reset_torcs() print("### TORCS is RELAUNCHED ###") # Modify here if you use multiple tracks in the environment self.client = snakeoil3.Client(p=3008, vision=self.vision) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf client = self.client client.get_servers_input() # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs self.observation = self.make_observation(obs) self.last_u = None self.initial_reset = False return self.get_obs()
def reset(self, client, relaunch=False): port = client.port self.time_step = 0 if self.initial_reset is not True: client.R.d['meta'] = True client.respond_to_server() ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! if relaunch is True: self.reset_torcs() print("### TORCS is RELAUNCHED ###") # Modify here if you use multiple tracks in the environment client = snakeoil3.Client(p=port, vision=self.vision) # Open new UDP in vtorcs client.MAX_STEPS = np.inf output = 1 # client = self.client client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs self.observation = self.make_observation(obs) self.currState = np.hstack( (self.observation.angle, self.observation.track, self.observation.trackPos, self.observation.speedX, self.observation.speedY, self.observation.speedZ, self.observation.wheelSpinVel / 100.0, self.observation.rpm)) self.last_u = None self.initial_reset = False return self.get_obs(), client
def __init__(self, id, port, vision=False, throttle=False, gear_change=False): # print("Init") self.vision = vision self.id = id # Provides a reference to the car. self.port = port # Port on which the agent will connect to the Torcs server. self.throttle = throttle self.gear_change = gear_change self.initial_run = True ##print("launch torcs") # os.system('pkill torcs') time.sleep(0.5) if self.vision is True: os.system('torcs -nofuel -nodamage -nolaptime -vision &') else: os.system('torcs -nofuel -nodamage -nolaptime &') time.sleep(0.5) os.system('sh autostart.sh') time.sleep(0.5) # Modify here if you use multiple tracks in the environment self.client = snakeoil3.Client( p=self.port, vision=self.vision) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf client = self.client # Client gets stuck here. Server does not respond because it is # waiting for other clients to connect. # client.get_servers_input() # Get the initial input from torcs obs = client.ServerState.data # Get the current full-observation from torcs self.obs = obs if throttle is False: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1, )) else: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, )) if vision is False: high = np.array( [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) low = np.array( [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf]) self.observation_space = spaces.Box(low=low, high=high) else: high = np.array( [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255]) low = np.array( [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0]) self.observation_space = spaces.Box(low=low, high=high)
def main(): assert len( sys.argv ) > 1, 'python model.py render/norender path_to_mode.json [seed]' render_mode_string = str(sys.argv[1]) if (render_mode_string == "render"): render_mode = True else: render_mode = False use_model = False if len(sys.argv) > 2: use_model = True filename = sys.argv[2] print("filename", filename) the_seed = np.random.randint(10000) if len(sys.argv) > 3: the_seed = int(sys.argv[3]) print("seed", the_seed) if (use_model): model = make_model() print('model size', model.param_count) client = snakeoil3.Client(p=3001, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) model.make_env(client) model.load_model(filename) else: model = make_model(load_model=False) print('model size', model.param_count) model.make_env(render_mode=render_mode) model.init_random_model_params(stdev=np.random.rand() * 0.01) N_episode = 100 if render_mode: N_episode = 1 reward_list = [] for i in range(N_episode): reward, steps_taken = simulate(model, train_mode=False, render_mode=render_mode, num_episode=1) if render_mode: print("terminal reward", reward, "average steps taken", np.mean(steps_taken) + 1) else: print(reward[0]) reward_list.append(reward[0]) if not render_mode: print("seed", the_seed, "average_reward", np.mean(reward_list), "stdev", np.std(reward_list))
def __init__(self, idx=0): self.idx = idx self.port = 3101 + self.idx self.action_dim = 3 self.state_dim = 65 self.obs = [] self.client = snakeoil3.Client(p=self.port, vision=False) self.s_t = [] self.r_t = 0 self.done = 0 self.action = [0, 1, 0] self.client.MAX_STEPS = np.inf
def reset(self, relaunch=False, sampletrack=False, render=False): """ Reset the environment Arguments: - relaunch: Relaunch the game. Necessary to call with from time to time because of the memory leak sampletrack: Sample a random track and load the game with it at the relaunch. Relaunch needs to be true in order to modify the track! render: Change the mode. If true, game will be launch in "render" mode else with "results only" mode. Relaunch needs to be true in order to modify the track! """ self.time_step = 0 if relaunch: if sampletrack: try: sample_track(self.root) except AttributeError: pass try: set_render_mode(self.root, render=render) except AttributeError: pass self.tree.write(self.path) time.sleep(0.5) if self.initial_reset is not True: self.client.R.d['meta'] = True self.client.respond_to_server() ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! if relaunch is True: self.reset_torcs() # print("### TORCS is RELAUNCHED ###") # Modify here if you use multiple tracks in the environment self.client = snakeoil3.Client(p=self.port, vision=False) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf client = self.client client.get_servers_input() # Get the initial input from torcs self.client.MAX_STEPS = np.inf obs = client.S.d # Get the current full-observation from torcs self.observation = self.make_observaton(obs) self.place = int(obs["racePos"]) self.initial_reset = False return self.get_obs()
def slave(): packet = np.empty(SOLUTION_PACKET_SIZE, dtype=np.int32) comm.Recv(packet, source=0) assert (len(packet) == SOLUTION_PACKET_SIZE) solutions = decode_solution_packet(packet) worker_id, jobidx, seed, train_mode, max_len, weights = solutions[0] port = 3001 + worker_id print(port) client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) model.make_env(client) packet = np.empty(SOLUTION_PACKET_SIZE, dtype=np.int32) results = [] for solution in solutions: worker_id, jobidx, seed, train_mode, max_len, weights = solution assert (train_mode == 1 or train_mode == 0), str(train_mode) worker_id = int(worker_id) possible_error = "work_id = " + str(worker_id) + " rank = " + str(rank) assert worker_id == rank, possible_error jobidx = int(jobidx) seed = int(seed) fitness, timesteps = worker(weights, seed, train_mode, max_len) results.append([worker_id, jobidx, fitness, timesteps]) result_packet = encode_result_packet(results) assert len(result_packet) == RESULT_PACKET_SIZE comm.Send(result_packet, dest=0) while 1: comm.Recv(packet, source=0) assert (len(packet) == SOLUTION_PACKET_SIZE) solutions = decode_solution_packet(packet) results = [] for solution in solutions: worker_id, jobidx, seed, train_mode, max_len, weights = solution assert (train_mode == 1 or train_mode == 0), str(train_mode) worker_id = int(worker_id) possible_error = "work_id = " + str(worker_id) + " rank = " + str( rank) assert worker_id == rank, possible_error jobidx = int(jobidx) seed = int(seed) fitness, timesteps = worker(weights, seed, train_mode, max_len) results.append([worker_id, jobidx, fitness, timesteps]) result_packet = encode_result_packet(results) assert len(result_packet) == RESULT_PACKET_SIZE comm.Send(result_packet, dest=0)
def __init__(self, vision=False, throttle=False, gear_change=False): #print("Init") self.vision = vision self.throttle = throttle self.gear_change = gear_change self.initial_run = True ##print("launch torcs") #os.system('pkill torcs') time.sleep(0.5) if self.vision is True: os.system('torcs -nofuel -nodamage -nolaptime -vision &') else: os.system('torcs -nofuel -nodamage -nolaptime &') time.sleep(0.5) os.system('sh autostart.sh') time.sleep(0.5) # Modify here if you use multiple tracks in the environment self.client = snakeoil3.Client( p=3101, vision=self.vision) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf client = self.client client.get_servers_input() # Get the initial input from torcs obs = client.ServerState.data # Get the current full-observation from torcs self.obs = obs if throttle is False: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1, )) else: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, )) if vision is False: high = np.array( [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) low = np.array( [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf]) self.observation_space = spaces.Box(low=low, high=high) else: high = np.array( [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255]) low = np.array( [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0]) self.observation_space = spaces.Box(low=low, high=high)
def main(_): """ main function :param _: null :return: null """ #init path = root_dir + '/' + save_dir print("save in {}, target_speed {}".format(path,FLAGS.target_speed)) if (os.path.exists(path)): print('directory existed, run again') sys.exit() else: os.mkdir(root_dir + '/' + save_dir) os.chdir(path) pygame.joystick.init() joystick = pygame.joystick.Joystick(0) joystick.init() pygame.display.init() #variable C = snakeoil3_gym.Client(p=3001) # 3001 driver = driveClient(FLAGS.target_speed,C,joystick) start = time.time() save = myThread(joystick,path) #save image save.start() #control fragment for step in range(C.maxSteps, 0, -1): C.get_servers_input() driver.drive() C.respond_to_server() C.shutdown() print(time.time() - start) with open(path + '/' + 'data.txt', 'w') as f: for l in labels: f.write(str(l) + '\n') save.stop(True) # save.is sys.exit()
def reset(self, relaunch=False): #print("Reset") self.time_step = 0 if self.initial_reset is not True: self.client.R.d['meta'] = True self.client.respond_to_server() ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! if relaunch is True: self.reset_torcs() print("### TORCS is RELAUNCHED ###") # Modify here if you use multiple tracks in the environment ### dosssman: Pass existing process id and race config path self.client = snakeoil3.Client( p=3101, vision=self.vision, process_id=self.torcs_process_id, race_config_path=self.race_config_path, race_speed=self.race_speed, rendering=self.rendering, lap_limiter=self.lap_limiter, damage=self.damage, ) #Open new UDP in vtorcs self.client.MAX_STEPS = np.inf client = self.client client.get_servers_input() # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs self.observation = self.make_observaton(obs) self.last_u = None self.initial_reset = False # THe newly created TOrcs PID is also reattached to the Gym Torcs Env # This should be temporary ... but only time knows self.torcs_process_id = self.client.torcs_process_id return self.get_obs()
R['gear']=2 if S['speedX']>80: R['gear']=3 if S['speedX']>110: R['gear']=4 if S['speedX']>140: R['gear']=5 if S['speedX']>170: R['gear']=6 return if __name__ == "__main__": C= snakeoil3_gym.Client(p=3101) for step in range(C.maxSteps,0,-1): C.get_servers_input() mydrive(C) C.respond_to_server() C.shutdown() # #!/usr/bin/python # import snakeoil3_gym # if __name__ == "__main__": # Cs= [ snakeoil3_gym.Client(p=P) for P in [3101,3102,3103,3104] ] # for step in range(Cs[0].maxSteps,0,-1): # for C in Cs: # C.get_servers_input() # mydrive(C)
def playGame(f_diagnostics, train_indicator, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 23+18 #Number of sensors input #23 for simstar if(USE_SIMSTAR): env_name = 'Simstar_Env' else: env_name = 'Torcs_Env' agent = DDPG(env_name, state_dim, action_dim) # Generate a Torcs environment print("I have been asked to use port: ", port) if(USE_SIMSTAR): env = SimstarEnv(synronized_mode=True, speed_up=4,hz=5,add_agent=True,agent_set_speed=0, agent_rel_pos=35,autopilot_agent=OTHER_AGENT_AUTOPILOT) ob = env.reset() else: env = TorcsEnv(vision=False, throttle=True, gear_change=False) client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents)) state_other_vehicle = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents)) EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 1 early_stop = 1 # Counting the total reward and total steps in the current episode total_reward = 0. info = {'termination_cause':0} distance_traversed = 0. speed_array=[] trackPos_array=[] print('\n\nStarting new episode...\n') for step in range(max_steps): # Take noisy actions during training if (train_indicator): epsilon -= 1.0 / EXPLORE epsilon = max(epsilon, epsilon_steady_state) a_t = agent.noise_action(s_t,epsilon) #Take noisy actions during training a_t = agent.action(s_t) else: a_t = agent.action(s_t) # a_t is of the form: [steer, accel, brake] if(USE_SIMSTAR): if not OTHER_AGENT_AUTOPILOT: # control second vehicle obs_other = env.get_agent_obs() state_other_vehicle = np.hstack((obs_other.angle, obs_other.track, obs_other.trackPos, obs_other.speedX, obs_other.speedY,obs_other.opponents)) other_vehicle_action = agent.action(state_other_vehicle) env.set_agent_action(other_vehicle_action) ob, r_t, done, info = env.step(a_t) else: ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break analyse_info(info, printing=False) s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents)) distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second speed_array.append(ob.speedX*np.cos(ob.angle)) trackPos_array.append(ob.trackPos) #Checking for nan rewards: TODO: This was actually below the following block if (math.isnan( r_t )): r_t = 0.0 for bad_r in range( 50 ): print( 'Bad Reward Found' ) break #Introduced by Anirban # Add to replay buffer only if training if (train_indicator): agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ( (np.mod(step,15)==0) ): print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t ) totalSteps += 1 if done: break # Saving the best model. if ((save_indicator==1) and (train_indicator ==1 )): if (total_reward >= best_reward): print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward)) best_reward = total_reward agent.saveNetwork() if np.mod(i, 20) == 0: print("***************************************************************************************************************************") agent.saveNetwork() running_avg_reward = running_average(running_avg_reward, i+1, total_reward) print("TOTAL REWARD @ " + str(i) +"-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print(info) if(USE_SIMSTAR): ob= env.reset() else: if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client) else: ob, client = env.reset(client=client, relaunch=True) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents)) # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics) env.end() # Shut down TORCS print("Finish.")
def playGame(f_diagnostics, train_indicator, agent, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 65 #of sensors input env_name = 'Torcs_Env' save_location = "./weights/" # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, \ ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) except: pass EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') print("Initial memory consumption: ") for step in range(max_steps): # Take noisy actions during training if (train_indicator == 1): epsilon -= 1.0 / EXPLORE epsilon = max(epsilon, epsilon_steady_state) a_t = agent.noise_action( s_t, epsilon) #Take noisy actions during training else: a_t = agent.action(s_t) try: ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break analyse_info(info, printing=False) s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \ ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) distance_traversed += ob.speedX * np.cos( ob.angle) #Assuming 1 step = 1 second if (math.isnan(r_t)): r_t = 0.0 for bad_r in range(50): print('Bad Reward Found') break #Introduced by Anirban # Add to replay buffer only if training if (train_indicator): agent.perceive(s_t, a_t, r_t, s_t1, done) # Add experience to replay buffer except Exception as e: print("Exception caught at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass continue total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ((np.mod(step, 15) == 0)): print("Episode", i, "Step", step, "Epsilon", epsilon, "Action", a_t, "Reward", r_t) totalSteps += 1 if done: break # Saving the best model. running_avg_reward = running_average(running_avg_reward, i + 1, total_reward) if train_indicator == 1: #Save network after every 20 episodes and store the data if np.mod(i, 20) == 0: agent.saveNetwork(i) #Saving training data for client for analysis if train_indicator == 1 and np.mod(i, 5) == 0: f1 = open(str(port) + ".csv", "a+") client.printAnalysis(f1, i) f1.close() print("TOTAL REWARD @ " + str(i) +"-th Episode : Num_Steps= " + str(step) + "; Max_steps= " \ + str(max_steps) +"; Reward= " + str(total_reward) + \ "; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print(info) try: if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client, relaunch=True) else: ob, client = env.reset(client=client, relaunch=True) except Exception as e: print("Exception caught at point B at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: print("Exception caught at at point C at port " + str(i) + str(e)) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \ ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) env.end() # This is for shutting down TORCS f1.close() print("Finish.")
def __init__(self, vision=False, throttle=False, gear_change=False, display=False): self.vision = vision self.throttle = throttle self.gear_change = gear_change self.display = display self.initial_run = True print("launch torcs") os.system('pkill torcs') time.sleep(0.5) cur_dir = os.path.dirname(os.path.realpath(sys.argv[0])) if self.vision is True: os.system('torcs -nofuel -nodamage -nolaptime -vision &') else: if self.display is True: os.system('torcs -nofuel -nolaptime &') time.sleep(0.5) os.system('sh autostart.sh') else: os.system('torcs -r ' + cur_dir + '/race_config.xml -nofuel -nolaptime &') time.sleep(0.5) # Modify here if you use multiple tracks in the environment self.client = snakeoil3.Client( p=3001, vision=self.vision, display=self.display) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf client = self.client client.get_servers_input() # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs if throttle is False: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1, )) else: self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, )) if vision is False: high = np.array( [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) low = np.array( [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf]) self.observation_space = spaces.Box(low=low, high=high) else: high = np.array( [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255]) low = np.array( [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0]) self.observation_space = spaces.Box(low=low, high=high) self.prevSpeedX = 0 self.prevAccX = 0 self.prevJerkX = 0 self.prevSnapX = 0 self.prevSpeedY = 0 self.prevAccY = 0 self.prevJerkY = 0 self.prevSnapY = 0 self.past_t = 0 self.past_d = 0 self.KPH_to_MPS = 0.277778
def playGame(f_diagnostics, train_indicator, port=3101): #1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 29 #Number of sensors input env_name = 'Torcs_Env' # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False) client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) # EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False # epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 # 1 to save the learned weights, 0 otherwise early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') for step in range(max_steps): #Hard-coded steer=0, accel=1 and brake=0, define a_t as per any other algorithm a_t = np.asarray([0.0, 1.0, 0.0]) # [steer, accel, brake] ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break analyse_info(info, printing=False) s_t1 = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm)) distance_traversed += ob.speedX * np.cos( ob.angle) #Assuming 1 step = 1 second speed_array.append(ob.speedX * np.cos(ob.angle)) trackPos_array.append(ob.trackPos) #Checking for nan rewards: TODO: This was actually below the following block if (math.isnan(r_t)): r_t = 0.0 for bad_r in range(50): print("Bad Reward Found") break #Introduced by Anirban total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ((np.mod(step, 15) == 0)): print("Episode", i, "Step", step, "Epsilon", epsilon, "Action", a_t, "Reward", r_t) totalSteps += 1 if done: break # Saving the best model. if ((save_indicator == 1) and (train_indicator == 1)): if (total_reward >= best_reward): print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward)) best_reward = total_reward agent.saveNetwork() running_avg_reward = running_average(running_avg_reward, i + 1, total_reward) print("TOTAL REWARD @ " + str(i) + "-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) + "; Reward= " + str(total_reward) + "; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print(info) if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('\n\n***Hard reset by some agent***\n\n') ob, client = env.reset(client=client) else: ob, client = env.reset(client=client, relaunch=True) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm)) ##uncomment this to get some statistics per episode like total distance traversed, average speed, distance from center of track, etc # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics) env.end() # Shut down TORCS print("Finish.")
def master(): start_time = int(time.time()) sprint("training", gamename) sprint("population", es.popsize) sprint("num_worker", num_worker) sprint("num_worker_trial", num_worker_trial) sys.stdout.flush() seeder = Seeder(seed_start) filename = filebase + '.json' filename_log = filebase + '.log.json' filename_hist = filebase + '.hist.json' filename_hist_best = filebase + '.hist_best.json' filename_best = filebase + '.best.json' port = 3001 client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) model.make_env(client) port = port + 1 t = 0 history = [] history_best = [] # stores evaluation averages every 25 steps or so eval_log = [] best_reward_eval = 0 best_model_params_eval = None max_len = -1 # max time steps (-1 means ignore) while True: t += 1 solutions = es.ask() if antithetic: seeds = seeder.next_batch(int(es.popsize / 2)) seeds = seeds + seeds else: seeds = seeder.next_batch(es.popsize) packet_list = encode_solution_packets(seeds, solutions, max_len=max_len) send_packets_to_slaves(packet_list) reward_list_total = receive_packets_from_slaves() reward_list = reward_list_total[:, 0] # get rewards mean_time_step = int(np.mean(reward_list_total[:, 1]) * 100) / 100. # get average time step max_time_step = int(np.max(reward_list_total[:, 1]) * 100) / 100. # get average time step avg_reward = int( np.mean(reward_list) * 100) / 100. # get average time step std_reward = int( np.std(reward_list) * 100) / 100. # get average time step es.tell(reward_list) es_solution = es.result() model_params = es_solution[0] # best historical solution reward = es_solution[1] # best reward curr_reward = es_solution[2] # best of the current batch model.set_model_params(np.array(model_params).round(4)) r_max = int(np.max(reward_list) * 100) / 100. r_min = int(np.min(reward_list) * 100) / 100. curr_time = int(time.time()) - start_time h = (t, curr_time, avg_reward, r_min, r_max, std_reward, int(es.rms_stdev() * 100000) / 100000., mean_time_step + 1., int(max_time_step) + 1) if cap_time_mode: max_len = 2 * int(mean_time_step + 1.0) else: max_len = -1 history.append(h) with open(filename, 'wt') as out: res = json.dump([np.array(es.current_param()).round(4).tolist()], out, sort_keys=True, indent=2, separators=(',', ': ')) with open(filename_hist, 'wt') as out: res = json.dump(history, out, sort_keys=False, indent=0, separators=(',', ':')) sprint(gamename, h) if (t == 1): best_reward_eval = avg_reward if (t % eval_steps == 0): # evaluate on actual task at hand prev_best_reward_eval = best_reward_eval model_params_quantized = np.array(es.current_param()).round(4) reward_eval = evaluate_batch(model_params_quantized, max_len=-1) model_params_quantized = model_params_quantized.tolist() improvement = reward_eval - best_reward_eval eval_log.append([t, reward_eval, model_params_quantized]) with open(filename_log, 'wt') as out: res = json.dump(eval_log, out) if (len(eval_log) == 1 or reward_eval > best_reward_eval): best_reward_eval = reward_eval best_model_params_eval = model_params_quantized else: if retrain_mode: sprint( "reset to previous best params, where best_reward_eval =", best_reward_eval) es.set_mu(best_model_params_eval) with open(filename_best, 'wt') as out: res = json.dump([best_model_params_eval, best_reward_eval], out, sort_keys=True, indent=0, separators=(',', ': ')) # dump history of best curr_time = int(time.time()) - start_time best_record = [ t, curr_time, "improvement", improvement, "curr", reward_eval, "prev", prev_best_reward_eval, "best", best_reward_eval ] history_best.append(best_record) with open(filename_hist_best, 'wt') as out: res = json.dump(history_best, out, sort_keys=False, indent=0, separators=(',', ':')) sprint("Eval", t, curr_time, "improvement", improvement, "curr", reward_eval, "prev", prev_best_reward_eval, "best", best_reward_eval)
def playGame(f_diagnostics, train_indicator, agent, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 65 #of sensors input env_name = 'Torcs_Env' save_location = "./weights/" # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') print("Initial memory consumption: ") for step in range(max_steps): # Take noisy actions during training try: client.get_servers_input(step) snakeoil3.drive_example(client) client.respond_to_server() except Exception as e: print("Exception caught at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass continue if done: break print(info) try: if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client, relaunch=True) else: ob, client = env.reset(client=client, relaunch=True) except Exception as e: print("Exception caught at point B at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: print("Exception caught at at point C at port " + str(i) + str(e)) env.end() # This is for shutting down TORCS print("Finish.")
MAX_TRIALS = 200 # just use this to extract one trial. render_mode = False # for debugging. parser = argparse.ArgumentParser( description=('Train policy on OpenAI Gym environment ' 'using pepg, ses, openes, ga, cma')) parser.add_argument('--port', type=int, default=1, help='port') args = parser.parse_args() DIR_NAME = 'record' if not os.path.exists(DIR_NAME): os.makedirs(DIR_NAME) port = 3001 + args.port client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf if port < 3006: time.sleep(5) client.get_servers_input(0) # Get the initial input from torcs model = make_model(load_model=False) total_frames = 0 model.make_env(client, render_mode=render_mode, full_episode=True) #obs = client.S.d # Get the current full-observation from torcs #ob = env.make_observation(obs) for trial in range(MAX_TRIALS): # 200 trials per worker try:
def mydrive(c,vel,ster,fi,ke): S,R= c.S.d,c.R.d target_speed=vel if ke <=6 : R['steer']= S['angle']*10 / snakeoil3_gym.PI R['steer']-= S['trackPos']*.10 print('ke',vel) else: R['steer']=ster #print(S['xcod'],S['ycod']) if S['speedX'] < target_speed - (R['steer']*50): R['accel']+= .01 else: R['accel']-= .01 if S['speedX']<10: #R['accel']+= 1/(S['speedX']+.1) R['accel']+=0.02 if ke == 9 : if ((S['wheelSpinVel'][2]+S['wheelSpinVel'][3])-(S['wheelSpinVel'][0]+S['wheelSpinVel'][1]) > 5): R['accel']-= .2 R['gear']=1 if S['speedX']>50: R['gear']=2 if S['speedX']>80: R['gear']=3 if S['speedX']>110: R['gear']=4 if S['speedX']>140: R['gear']=5 if S['speedX']>170: R['gear']=6 if(fi==9): print(S['xcod'],S['ycod'],0,0,S['angle'],10,0) x0 = S['xcod'] y0 = S['ycod'] x_ob = 0 y_ob = 0 theta_in = S['angle'] na = 10 mela = 0 ob=0 if item in S['opponents']<30: x_ob = S['xcod']+S['opponents']*cos(S['opponents'].index(item)*10*pi/180); y_ob = S['ycod']+S['opponents']*sin(S['opponents'].index(item)*10*pi/180); ob=1 return x0,y0,x_ob,y_ob,theta_in,na,mela,ob else: return if __name__ == "__main__": i=0 if i==0: eng = matlab.engine.start_matlab() eng.launch_iros(85.3066,19.0348,0,0,0,10,1) eng.quit() velo = genfromtxt('vec.csv', delimiter=',') ster = genfromtxt('woc.csv', delimiter=',') ct =0 C= snakeoil3_gym.Client(p=3101) for step in range(C.maxSteps,0,-1): C.get_servers_input() if (i==9): x0,y0,x_ob,y_ob,theta_in,na,mela,ob=mydrive(C,int(velo[i]),int(ster[i]),i,ct) else: mydrive(C,int(velo[i]),int(ster[i]),i,ct) i+=1 if (i==10): keyboard.press('p') eng = matlab.engine.start_matlab() eng.launch_iros(y0,x0,x_ob,y_ob,theta_in,na,mela,ob) eng.quit() velo = genfromtxt('vec.csv', delimiter=',') ster = genfromtxt('woc.csv', delimiter=',') i = 0 ct+=1 keyboard.release('p') C.respond_to_server() if ct == 10: break C.shutdown() # #!/usr/bin/python # import snakeoil3_gym # if __name__ == "__main__": # Cs= [ snakeoil3_gym.Client(p=P) for P in [3101,3102,3103,3104] ] # for step in range(Cs[0].maxSteps,0,-1): # for C in Cs: # C.get_servers_input() # mydrive(C) # C.respond_to_server() # else: # for C in Cs: C.shutdown()
for worker in workers: # worker threads #client = snakeoil3.Client(p=3101+i, vision=False) # Open new UDP in vtorcs #client.MAX_STEPS = np.inf #i+=1 t = threading.Thread(target=worker.work, args=()) t.start() threads.append(t) # add a PPO updating thread threads.append(threading.Thread(target=GLOBAL_PPO.update, )) threads[-1].start() COORD.join(threads) #save_path = saver.save(sess, "./model.ckpt") else: # plot reward change and testing #plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R) #plt.xlabel('Episode'); plt.ylabel('Moving reward'); plt.ion(); plt.show() #env.set_fps(30) while True: client = snakeoil3.Client(p=3101, vision=False) ob, client = env.reset(client) for t in range(4000): s = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) a = GLOBAL_PPO.choose_action(s) #a[1] = abs(a[1]) #a[2] = abs(a[2]) #env.render() ob, r, done, info = env.step(t, client, a) print("reward at current step " + str(r))
def __init__(self, wid): self.wid = wid self.ppo = GLOBAL_PPO #self.i = i self.client = snakeoil3.Client(p=3101 + self.wid, vision=False)
def work(self): best = 0 self.client.MAX_STEPS = np.inf self.client.get_servers_input(0) # Get the initial input from torcs obs = self.client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER while not COORD.should_stop(): try: ob, self.client = env.reset(self.client) except Exception as e: print("Exception caught in reset " + str(traceback.format_exc())) while True: try: self.client = snakeoil3.Client( p=3101 + self.wid, vision=False) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf self.client.get_servers_input( 0) # Get the initial input from torcs obs = self.client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except Exception as e2: print("Exception caught in reset's exception " + str(traceback.format_exc())) else: print("blahblahblah") break ep_r = 0 buffer_s, buffer_a, buffer_r = [], [], [] for t in range(EP_LEN): if not ROLLING_EVENT.is_set(): # while global PPO is updating ROLLING_EVENT.wait() # wait until PPO is updated buffer_s, buffer_a, buffer_r = [], [], [ ] # clear history buffer s = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) a = self.ppo.choose_action(s) ETA = (1.0 - 2 * float(GLOBAL_EP / EP_MAX)) # if self.wid == 0: # ETA = -1.0 a[0] += max(ETA, 0) * OU.function(a[0], 0.0, 0.8, 0.40) a[1] += max(ETA, 0) * OU.function(a[1], 0.5, 0.80, 0.10) a[2] += max(ETA, 0) * OU.function(a[2], -0.1, 1.00, 0.05) #a[2] = 0. #ETA = (1-2*GLOBAL_EP/EP_MAX) #a[1] = abs(a[1]) #a[2] = abs(a[2]) try: ob, r, done, info = env.step(t, self.client, a) except Exception as e: print("Exception caught in step " + str(traceback.format_exc())) while True: try: self.client = snakeoil3.Client( p=3101 + self.wid, vision=False) # Open new UDP in vtorcs self.client.MAX_STEPS = np.inf self.client.get_servers_input( 0) # Get the initial input from torcs obs = self.client.S.d # Get the current full-observation from torcs print(ob) ob = env.make_observation(obs) except Exception as e2: print("Exception caught in reset's exception " + str(traceback.format_exc())) else: print( "12321321321321321321312321321321321321312321312312312312" ) break continue print("Episode: " + str(GLOBAL_EP) + " Step: " + str(t) + " Action: " + str(a) + "Reward: " + str(r)) s_ = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) #s_, r, done = env.step(a) buffer_s.append(s) buffer_a.append(a) buffer_r.append(r) # normalize reward, find to be useful s = s_ ep_r += r GLOBAL_UPDATE_COUNTER += 1 # count to minimum batch size if t == EP_LEN - 1 or GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE: v_s_ = self.ppo.get_v(s_) discounted_r = [] # compute discounted reward for r in buffer_r[::-1]: v_s_ = r + GAMMA * v_s_ discounted_r.append(v_s_) discounted_r.reverse() bs, ba, br = np.vstack(buffer_s), np.vstack( buffer_a), np.array(discounted_r)[:, np.newaxis] buffer_s, buffer_a, buffer_r = [], [], [] QUEUE.put(np.hstack((bs, ba, br))) if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE: ROLLING_EVENT.clear() # stop collecting data UPDATE_EVENT.set() # globalPPO update if GLOBAL_EP >= EP_MAX: # stop training COORD.request_stop() break if done: break if (GLOBAL_EP % 10 == 0 and ep_r > best): best = ep_r ckpt_path = os.path.join('./weights_new/' + '%i' % GLOBAL_EP, 'PPO.ckpt') save_path = GLOBAL_PPO.saver.save(GLOBAL_PPO.sess, ckpt_path, write_meta_graph=False) # record reward changes, plot later if self.wid == 0: Episode_reward.append(ep_r) np.savetxt('episode_reward.txt', Episode_reward) if len(GLOBAL_RUNNING_R) == 0: GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 + ep_r * 0.1) GLOBAL_EP += 1 print('{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100), '|W%i' % self.wid, '|Ep_r: %.2f' % ep_r, '\t |Best Ep_r: %.2f' % best, '\t |Epsilon: %.4f' % ETA)
def playGame(f_diagnostics, train_indicator, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 29 #of sensors input env_name = 'Torcs_Env' agent = DDPG(env_name, state_dim, action_dim) # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False) client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 # env.reset(client=client, relaunch=True) # random_number = random.random() # eps_early = max(epsilon,epsilon_steady_state) #At least 0.01 # if (random_number < (1.0-eps_early)) and (train_indicator == 1): #During training, at most 99% of the time, early stopping would be engaged # early_stop = 1 # else: # early_stop = 0 early_stop = 1 # print("Episode : " + str(i) + " Replay Buffer " + str(agent.replay_buffer.count()) + ' Early Stopping: ' + str(early_stop) + ' Epsilon: ' + str(eps_early) + ' RN: ' + str(random_number) ) #Initializing the first state # s_t = np.hstack((ob['angle'], ob['track'], ob['trackPos'], ob['speedX'], ob['speedY'], ob['speedZ'], ob['wheelSpinVel']/100.0, ob['rpm'])) # s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) # Counting the total reward and total steps in the current episode total_reward = 0. info = {'termination_cause':0} distance_traversed = 0. speed_array=[] trackPos_array=[] print '\n\nStarting new episode...\n' for step in xrange(max_steps): # Take noisy actions during training if (train_indicator): epsilon -= 1.0 / EXPLORE epsilon = max(epsilon, epsilon_steady_state) a_t = agent.noise_action(s_t,epsilon) #Take noisy actions during training else: a_t = agent.action(s_t) # a_t = np.asarray([0.0, 1.0, 0.0]) # [steer, accel, brake] ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break # print done # print 'Action taken' analyse_info(info, printing=False) s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second speed_array.append(ob.speedX*np.cos(ob.angle)) trackPos_array.append(ob.trackPos) #Checking for nan rewards: TODO: This was actually below the following block if (math.isnan( r_t )): r_t = 0.0 for bad_r in range( 50 ): print( 'Bad Reward Found' ) break #Introduced by Anirban # Add to replay buffer only if training if (train_indicator): agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ( (np.mod(step,15)==0) ): print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t ) totalSteps += 1 if done: break # Saving the best model. if ((save_indicator==1) and (train_indicator ==1 )): if (total_reward >= best_reward): print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward)) best_reward = total_reward agent.saveNetwork() running_avg_reward = running_average(running_avg_reward, i+1, total_reward) print("TOTAL REWARD @ " + str(i) +"-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print info if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset': print 'Hard reset by some agent' ob, client = env.reset(client=client) else: ob, client = env.reset(client=client, relaunch=True) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics) env.end() # This is for shutting down TORCS print("Finish.")
#------------------------------------------------------------------------------------------------------------------# if __name__ == "__main__": i=0 if i==0: eng = matlab.engine.start_matlab() eng.launch_iros(249.44,174.5,240,173.5,0,10,0,1) eng.quit() velo = genfromtxt('vec.csv', delimiter=',') ster = genfromtxt('woc.csv', delimiter=',') ct =0 Cs= [ snakeoil3_gym.Client(p=P) for P in [3101,3102] ] for step in range(Cs[0].maxSteps,0,-1): Cs[0].get_servers_input() Cs[1].get_servers_input() if (i==9): x0,y0,x_ob,y_ob,theta_in,na,mela,ob=mydrive(Cs[0],int(velo[i]),int(ster[i]),i,ct) else: mydrive(Cs[0],int(velo[i]),int(ster[i]),i,ct) i+=1 if (i==10): keyboard.press('p') eng = matlab.engine.start_matlab() eng.launch_iros(x0,y0,x_ob,y_ob,theta_in,na,mela,ob) eng.quit() velo = genfromtxt('vec.csv', delimiter=',')