def playGame(f_diagnostics, train_indicator, agent, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 65 #of sensors input env_name = 'Torcs_Env' save_location = "./weights/" # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, \ ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) except: pass EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') print("Initial memory consumption: ") for step in range(max_steps): # Take noisy actions during training if (train_indicator == 1): epsilon -= 1.0 / EXPLORE epsilon = max(epsilon, epsilon_steady_state) a_t = agent.noise_action( s_t, epsilon) #Take noisy actions during training else: a_t = agent.action(s_t) try: ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break analyse_info(info, printing=False) s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \ ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) distance_traversed += ob.speedX * np.cos( ob.angle) #Assuming 1 step = 1 second if (math.isnan(r_t)): r_t = 0.0 for bad_r in range(50): print('Bad Reward Found') break #Introduced by Anirban # Add to replay buffer only if training if (train_indicator): agent.perceive(s_t, a_t, r_t, s_t1, done) # Add experience to replay buffer except Exception as e: print("Exception caught at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass continue total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ((np.mod(step, 15) == 0)): print("Episode", i, "Step", step, "Epsilon", epsilon, "Action", a_t, "Reward", r_t) totalSteps += 1 if done: break # Saving the best model. running_avg_reward = running_average(running_avg_reward, i + 1, total_reward) if train_indicator == 1: #Save network after every 20 episodes and store the data if np.mod(i, 20) == 0: agent.saveNetwork(i) #Saving training data for client for analysis if train_indicator == 1 and np.mod(i, 5) == 0: f1 = open(str(port) + ".csv", "a+") client.printAnalysis(f1, i) f1.close() print("TOTAL REWARD @ " + str(i) +"-th Episode : Num_Steps= " + str(step) + "; Max_steps= " \ + str(max_steps) +"; Reward= " + str(total_reward) + \ "; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print(info) try: if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client, relaunch=True) else: ob, client = env.reset(client=client, relaunch=True) except Exception as e: print("Exception caught at point B at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: print("Exception caught at at point C at port " + str(i) + str(e)) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \ ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) env.end() # This is for shutting down TORCS f1.close() print("Finish.")
class World(object): def __init__(self): self.n = 4 #self.n2 = 0 self.env = TorcsEnv(vision=False, throttle=True, gear_change=False) self.observation_space = self.env.observation_space # basically this is one agents' action space self.action_space = self.env.action_space self.step_count = 0 self.agent_list = [] #self.agent2_list = [] self.initialize_agents() def initialize_agents(self): self.agent_list = [] #self.agent2_list = [] for i in range(self.n): agent = Agent(idx=i) #agent.s_t = self.get_initial_observation(agent,0) #can remove step from here, can even remove the function self.agent_list.append(agent) ''' for i in range(self.n2): #uncomment this for competitive agents agent = Agent(idx = i + self.n) #agent.s_t = self.get_initial_observation(agent,0) self.agent2_list.append(agent)''' for i in range(self.n): self.agent_list[i].s_t = self.get_initial_observation( self.agent_list[i], 0) #can remove step from here, can even remove the function '''for i in range(self.n2): #uncomment this for competitive agents self.agent2_list[i].s_t = self.get_initial_observation(self.agent2_list[i],0)''' def reset_agents(self): for i in range(self.n): self.agent_list[i].client.R.d['meta'] = True def get_initial_observation(self, agent, step_count=0): agent.client.get_servers_input(step_count) obs = agent.client.S.d ob = self.env.make_observation(obs) agent.s_t = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) return agent.s_t def update_agent_state(self, agent): #this should be a function in agent class print("agent port " + str(agent.port) + " " + "action is " + str(agent.action)) ob, r_t, done, info = self.env.step(self.step_count, agent.client, agent.action, early_stop=0) agent.done = done agent.r_t = r_t agent.s_t = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents)) def reset_world(self): self.reset_agents() self.env.reset_torcs() self.initialize_agents() # check if self required here def stepWorld(self): for agent in self.agent_list: self.update_agent_state(agent) def reward(self, agent): return agent.r_t def observation(self, agent): return agent.s_t def done(self, agent): return agent.done
def playGame(f_diagnostics, train_indicator, agent, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 65 #of sensors input env_name = 'Torcs_Env' save_location = "./weights/" # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1) ob = None while ob is None: try: client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') print("Initial memory consumption: ") for step in range(max_steps): # Take noisy actions during training try: client.get_servers_input(step) snakeoil3.drive_example(client) client.respond_to_server() except Exception as e: print("Exception caught at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: pass continue if done: break print(info) try: if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('Hard reset by some agent') ob, client = env.reset(client=client, relaunch=True) else: ob, client = env.reset(client=client, relaunch=True) except Exception as e: print("Exception caught at point B at port " + str(i) + str(e)) ob = None while ob is None: try: client = snakeoil3.Client( p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input( 0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) except: print("Exception caught at at point C at port " + str(i) + str(e)) env.end() # This is for shutting down TORCS print("Finish.")
def playGame(f_diagnostics, train_indicator, port=3101): # 1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 29 #of sensors input env_name = 'Torcs_Env' agent = DDPG(env_name, state_dim, action_dim) # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False) client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 # env.reset(client=client, relaunch=True) # random_number = random.random() # eps_early = max(epsilon,epsilon_steady_state) #At least 0.01 # if (random_number < (1.0-eps_early)) and (train_indicator == 1): #During training, at most 99% of the time, early stopping would be engaged # early_stop = 1 # else: # early_stop = 0 early_stop = 1 # print("Episode : " + str(i) + " Replay Buffer " + str(agent.replay_buffer.count()) + ' Early Stopping: ' + str(early_stop) + ' Epsilon: ' + str(eps_early) + ' RN: ' + str(random_number) ) #Initializing the first state # s_t = np.hstack((ob['angle'], ob['track'], ob['trackPos'], ob['speedX'], ob['speedY'], ob['speedZ'], ob['wheelSpinVel']/100.0, ob['rpm'])) # s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) # Counting the total reward and total steps in the current episode total_reward = 0. info = {'termination_cause':0} distance_traversed = 0. speed_array=[] trackPos_array=[] print '\n\nStarting new episode...\n' for step in xrange(max_steps): # Take noisy actions during training if (train_indicator): epsilon -= 1.0 / EXPLORE epsilon = max(epsilon, epsilon_steady_state) a_t = agent.noise_action(s_t,epsilon) #Take noisy actions during training else: a_t = agent.action(s_t) # a_t = np.asarray([0.0, 1.0, 0.0]) # [steer, accel, brake] ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break # print done # print 'Action taken' analyse_info(info, printing=False) s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second speed_array.append(ob.speedX*np.cos(ob.angle)) trackPos_array.append(ob.trackPos) #Checking for nan rewards: TODO: This was actually below the following block if (math.isnan( r_t )): r_t = 0.0 for bad_r in range( 50 ): print( 'Bad Reward Found' ) break #Introduced by Anirban # Add to replay buffer only if training if (train_indicator): agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ( (np.mod(step,15)==0) ): print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t ) totalSteps += 1 if done: break # Saving the best model. if ((save_indicator==1) and (train_indicator ==1 )): if (total_reward >= best_reward): print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward)) best_reward = total_reward agent.saveNetwork() running_avg_reward = running_average(running_avg_reward, i+1, total_reward) print("TOTAL REWARD @ " + str(i) +"-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print info if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset': print 'Hard reset by some agent' ob, client = env.reset(client=client) else: ob, client = env.reset(client=client, relaunch=True) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics) env.end() # This is for shutting down TORCS print("Finish.")
def playGame(f_diagnostics, train_indicator, port=3101): #1 means Train, 0 means simply Run action_dim = 3 #Steering/Acceleration/Brake state_dim = 29 #Number of sensors input env_name = 'Torcs_Env' # Generate a Torcs environment print("I have been asked to use port: ", port) env = TorcsEnv(vision=False, throttle=True, gear_change=False) client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs client.MAX_STEPS = np.inf client.get_servers_input(0) # Get the initial input from torcs obs = client.S.d # Get the current full-observation from torcs ob = env.make_observation(obs) # EXPLORE = total_explore episode_count = max_eps max_steps = max_steps_eps epsilon = epsilon_start done = False # epsilon_steady_state = 0.01 # This is used for early stopping. totalSteps = 0 best_reward = -100000 running_avg_reward = 0. print("TORCS Experiment Start.") for i in range(episode_count): save_indicator = 0 # 1 to save the learned weights, 0 otherwise early_stop = 1 total_reward = 0. info = {'termination_cause': 0} distance_traversed = 0. speed_array = [] trackPos_array = [] print('\n\nStarting new episode...\n') for step in range(max_steps): #Hard-coded steer=0, accel=1 and brake=0, define a_t as per any other algorithm a_t = np.asarray([0.0, 1.0, 0.0]) # [steer, accel, brake] ob, r_t, done, info = env.step(step, client, a_t, early_stop) if done: break analyse_info(info, printing=False) s_t1 = np.hstack( (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm)) distance_traversed += ob.speedX * np.cos( ob.angle) #Assuming 1 step = 1 second speed_array.append(ob.speedX * np.cos(ob.angle)) trackPos_array.append(ob.trackPos) #Checking for nan rewards: TODO: This was actually below the following block if (math.isnan(r_t)): r_t = 0.0 for bad_r in range(50): print("Bad Reward Found") break #Introduced by Anirban total_reward += r_t s_t = s_t1 # Displaying progress every 15 steps. if ((np.mod(step, 15) == 0)): print("Episode", i, "Step", step, "Epsilon", epsilon, "Action", a_t, "Reward", r_t) totalSteps += 1 if done: break # Saving the best model. if ((save_indicator == 1) and (train_indicator == 1)): if (total_reward >= best_reward): print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward)) best_reward = total_reward agent.saveNetwork() running_avg_reward = running_average(running_avg_reward, i + 1, total_reward) print("TOTAL REWARD @ " + str(i) + "-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) + "; Reward= " + str(total_reward) + "; Running average reward= " + str(running_avg_reward)) print("Total Step: " + str(totalSteps)) print("") print(info) if 'termination_cause' in info.keys( ) and info['termination_cause'] == 'hardReset': print('\n\n***Hard reset by some agent***\n\n') ob, client = env.reset(client=client) else: ob, client = env.reset(client=client, relaunch=True) s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm)) ##uncomment this to get some statistics per episode like total distance traversed, average speed, distance from center of track, etc # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics) env.end() # Shut down TORCS print("Finish.")