Example #1
0
def playGame(f_diagnostics,
             train_indicator,
             agent,
             port=3101):  # 1 means Train, 0 means simply Run

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 65  #of sensors input
    env_name = 'Torcs_Env'
    save_location = "./weights/"

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1)
    ob = None
    while ob is None:
        try:
            client = snakeoil3.Client(p=port,
                                      vision=False)  # Open new UDP in vtorcs
            client.MAX_STEPS = np.inf
            client.get_servers_input(0)  # Get the initial input from torcs

            obs = client.S.d  # Get the current full-observation from torcs
            ob = env.make_observation(obs)

            s_t = np.hstack((ob.angle, ob.track, ob.trackPos, \
             ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))
        except:
            pass

    EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    epsilon_steady_state = 0.01  # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        save_indicator = 0
        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')
        print("Initial memory consumption: ")

        for step in range(max_steps):

            # Take noisy actions during training
            if (train_indicator == 1):
                epsilon -= 1.0 / EXPLORE
                epsilon = max(epsilon, epsilon_steady_state)
                a_t = agent.noise_action(
                    s_t, epsilon)  #Take noisy actions during training

            else:
                a_t = agent.action(s_t)

            try:
                ob, r_t, done, info = env.step(step, client, a_t, early_stop)
                if done:
                    break

                analyse_info(info, printing=False)

                s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \
                 ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))
                distance_traversed += ob.speedX * np.cos(
                    ob.angle)  #Assuming 1 step = 1 second

                if (math.isnan(r_t)):
                    r_t = 0.0
                    for bad_r in range(50):
                        print('Bad Reward Found')
                    break  #Introduced by Anirban

            # Add to replay buffer only if training
                if (train_indicator):
                    agent.perceive(s_t, a_t, r_t, s_t1,
                                   done)  # Add experience to replay buffer

            except Exception as e:
                print("Exception caught at port " + str(i) + str(e))
                ob = None
                while ob is None:
                    try:
                        client = snakeoil3.Client(
                            p=port, vision=False)  # Open new UDP in vtorcs
                        client.MAX_STEPS = np.inf
                        client.get_servers_input(
                            0)  # Get the initial input from torcs
                        obs = client.S.d  # Get the current full-observation from torcs
                        ob = env.make_observation(obs)
                    except:
                        pass
                    continue
            total_reward += r_t
            s_t = s_t1

            # Displaying progress every 15 steps.
            if ((np.mod(step, 15) == 0)):
                print("Episode", i, "Step", step, "Epsilon", epsilon, "Action",
                      a_t, "Reward", r_t)

            totalSteps += 1
            if done:
                break

        # Saving the best model.
        running_avg_reward = running_average(running_avg_reward, i + 1,
                                             total_reward)

        if train_indicator == 1:

            #Save network after every 20 episodes and store the data
            if np.mod(i, 20) == 0:
                agent.saveNetwork(i)

        #Saving training data for client for analysis
        if train_indicator == 1 and np.mod(i, 5) == 0:
            f1 = open(str(port) + ".csv", "a+")
            client.printAnalysis(f1, i)
            f1.close()


        print("TOTAL REWARD @ " + str(i) +"-th Episode  : Num_Steps= " + str(step) + "; Max_steps= " \
         + str(max_steps)  +"; Reward= " + str(total_reward) + \
          "; Running average reward= " + str(running_avg_reward))
        print("Total Step: " + str(totalSteps))
        print("")

        print(info)
        try:
            if 'termination_cause' in info.keys(
            ) and info['termination_cause'] == 'hardReset':
                print('Hard reset by some agent')
                ob, client = env.reset(client=client, relaunch=True)
            else:
                ob, client = env.reset(client=client, relaunch=True)
        except Exception as e:
            print("Exception caught at point B at port " + str(i) + str(e))
            ob = None
            while ob is None:
                try:
                    client = snakeoil3.Client(
                        p=port, vision=False)  # Open new UDP in vtorcs
                    client.MAX_STEPS = np.inf
                    client.get_servers_input(
                        0)  # Get the initial input from torcs
                    obs = client.S.d  # Get the current full-observation from torcs
                    ob = env.make_observation(obs)
                except:
                    print("Exception caught at at point C at port " + str(i) +
                          str(e))


        s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \
         ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))

    env.end()  # This is for shutting down TORCS
    f1.close()
    print("Finish.")
Example #2
0
class World(object):
    def __init__(self):
        self.n = 4
        #self.n2 = 0
        self.env = TorcsEnv(vision=False, throttle=True, gear_change=False)
        self.observation_space = self.env.observation_space  # basically this is one agents' action space
        self.action_space = self.env.action_space
        self.step_count = 0
        self.agent_list = []
        #self.agent2_list = []
        self.initialize_agents()

    def initialize_agents(self):
        self.agent_list = []
        #self.agent2_list = []
        for i in range(self.n):
            agent = Agent(idx=i)
            #agent.s_t = self.get_initial_observation(agent,0) #can remove step from here, can even remove the function
            self.agent_list.append(agent)
        ''' for i in range(self.n2):			#uncomment this for competitive agents
              agent = Agent(idx = i + self.n) 
              #agent.s_t = self.get_initial_observation(agent,0)
              self.agent2_list.append(agent)'''

        for i in range(self.n):
            self.agent_list[i].s_t = self.get_initial_observation(
                self.agent_list[i],
                0)  #can remove step from here, can even remove the function
        '''for i in range(self.n2):                   #uncomment this for competitive agents
              self.agent2_list[i].s_t = self.get_initial_observation(self.agent2_list[i],0)'''

    def reset_agents(self):
        for i in range(self.n):
            self.agent_list[i].client.R.d['meta'] = True

    def get_initial_observation(self, agent, step_count=0):
        agent.client.get_servers_input(step_count)
        obs = agent.client.S.d
        ob = self.env.make_observation(obs)
        agent.s_t = np.hstack(
            (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ,
             ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))
        return agent.s_t

    def update_agent_state(self,
                           agent):  #this should be a function in agent class
        print("agent port " + str(agent.port) + "  " + "action is " +
              str(agent.action))
        ob, r_t, done, info = self.env.step(self.step_count,
                                            agent.client,
                                            agent.action,
                                            early_stop=0)
        agent.done = done
        agent.r_t = r_t
        agent.s_t = np.hstack(
            (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ,
             ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))

    def reset_world(self):
        self.reset_agents()
        self.env.reset_torcs()
        self.initialize_agents()  # check if self required here

    def stepWorld(self):
        for agent in self.agent_list:
            self.update_agent_state(agent)

    def reward(self, agent):
        return agent.r_t

    def observation(self, agent):
        return agent.s_t

    def done(self, agent):
        return agent.done
Example #3
0
def playGame(f_diagnostics,
             train_indicator,
             agent,
             port=3101):  # 1 means Train, 0 means simply Run

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 65  #of sensors input
    env_name = 'Torcs_Env'
    save_location = "./weights/"

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1)
    ob = None
    while ob is None:
        try:
            client = snakeoil3.Client(p=port,
                                      vision=False)  # Open new UDP in vtorcs
            client.MAX_STEPS = np.inf

            client.get_servers_input(0)  # Get the initial input from torcs
            obs = client.S.d  # Get the current full-observation from torcs
            ob = env.make_observation(obs)

        except:
            pass

    EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    epsilon_steady_state = 0.01  # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        save_indicator = 0
        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')
        print("Initial memory consumption: ")
        for step in range(max_steps):

            # Take noisy actions during training
            try:
                client.get_servers_input(step)
                snakeoil3.drive_example(client)
                client.respond_to_server()

            except Exception as e:
                print("Exception caught at port " + str(i) + str(e))
                ob = None
                while ob is None:
                    try:
                        client = snakeoil3.Client(
                            p=port, vision=False)  # Open new UDP in vtorcs
                        client.MAX_STEPS = np.inf
                        client.get_servers_input(
                            0)  # Get the initial input from torcs
                        obs = client.S.d  # Get the current full-observation from torcs
                        ob = env.make_observation(obs)
                    except:
                        pass
                    continue

            if done:
                break

        print(info)
        try:
            if 'termination_cause' in info.keys(
            ) and info['termination_cause'] == 'hardReset':
                print('Hard reset by some agent')
                ob, client = env.reset(client=client, relaunch=True)
            else:
                ob, client = env.reset(client=client, relaunch=True)
        except Exception as e:
            print("Exception caught at point B at port " + str(i) + str(e))
            ob = None
            while ob is None:
                try:
                    client = snakeoil3.Client(
                        p=port, vision=False)  # Open new UDP in vtorcs
                    client.MAX_STEPS = np.inf
                    client.get_servers_input(
                        0)  # Get the initial input from torcs
                    obs = client.S.d  # Get the current full-observation from torcs
                    ob = env.make_observation(obs)
                except:
                    print("Exception caught at at point C at port " + str(i) +
                          str(e))

    env.end()  # This is for shutting down TORCS
    print("Finish.")
def playGame(f_diagnostics, train_indicator, port=3101):    # 1 means Train, 0 means simply Run
	
	action_dim = 3  #Steering/Acceleration/Brake
	state_dim = 29  #of sensors input
	env_name = 'Torcs_Env'
	agent = DDPG(env_name, state_dim, action_dim)

	# Generate a Torcs environment
	print("I have been asked to use port: ", port)
	env = TorcsEnv(vision=False, throttle=True, gear_change=False) 
	
	client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
	client.MAX_STEPS = np.inf

	client.get_servers_input(0)  # Get the initial input from torcs

	obs = client.S.d  # Get the current full-observation from torcs
	ob = env.make_observation(obs)

	s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))

	EXPLORE = total_explore
	episode_count = max_eps
	max_steps = max_steps_eps
	epsilon = epsilon_start
	done = False
	epsilon_steady_state = 0.01 # This is used for early stopping.
 
	totalSteps = 0
	best_reward = -100000
	running_avg_reward = 0.

	print("TORCS Experiment Start.")
	for i in range(episode_count):

		save_indicator = 0
			
		# env.reset(client=client, relaunch=True)	
		# random_number = random.random()
		# eps_early = max(epsilon,epsilon_steady_state) #At least 0.01 
		# if (random_number < (1.0-eps_early)) and (train_indicator == 1): #During training, at most 99% of the time, early stopping would be engaged 
		#     early_stop = 1
		# else: 
		#     early_stop = 0
		early_stop = 1
		# print("Episode : " + str(i) + " Replay Buffer " + str(agent.replay_buffer.count()) + ' Early Stopping: ' + str(early_stop) +  ' Epsilon: ' + str(eps_early) +  ' RN: ' + str(random_number)  )

		#Initializing the first state
		# s_t = np.hstack((ob['angle'], ob['track'], ob['trackPos'], ob['speedX'], ob['speedY'],  ob['speedZ'], ob['wheelSpinVel']/100.0, ob['rpm']))
		# s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
		# Counting the total reward and total steps in the current episode
		total_reward = 0.
		info = {'termination_cause':0}
		distance_traversed = 0.
		speed_array=[]
		trackPos_array=[]
		
		print '\n\nStarting new episode...\n'

		for step in xrange(max_steps):

			# Take noisy actions during training
			if (train_indicator):
			    epsilon -= 1.0 / EXPLORE
			    epsilon = max(epsilon, epsilon_steady_state) 
			    a_t = agent.noise_action(s_t,epsilon) #Take noisy actions during training
			else:
			    a_t = agent.action(s_t)
			# a_t = np.asarray([0.0, 1.0, 0.0])		# [steer, accel, brake]

			ob, r_t, done, info = env.step(step, client, a_t, early_stop)
			if done:
				break
			# print done
			# print 'Action taken'
			analyse_info(info, printing=False)

			s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
			distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second
			speed_array.append(ob.speedX*np.cos(ob.angle))
			trackPos_array.append(ob.trackPos)


			#Checking for nan rewards: TODO: This was actually below the following block
			if (math.isnan( r_t )):
				r_t = 0.0
				for bad_r in range( 50 ):
					print( 'Bad Reward Found' )
				break #Introduced by Anirban


			# Add to replay buffer only if training
			if (train_indicator):
				agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer


			total_reward += r_t
			s_t = s_t1

			# Displaying progress every 15 steps.
			if ( (np.mod(step,15)==0) ):        
			    print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t )

			totalSteps += 1
			if done:
				break

		# Saving the best model.
		if ((save_indicator==1) and (train_indicator ==1 )):
			if (total_reward >= best_reward):
				print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward))
				best_reward = total_reward
				agent.saveNetwork()     
	
		running_avg_reward = running_average(running_avg_reward, i+1, total_reward)  


		print("TOTAL REWARD @ " + str(i) +"-th Episode  : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward))
		print("Total Step: " + str(totalSteps))
		print("")

		print info
		if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset':
			print 'Hard reset by some agent'
			ob, client = env.reset(client=client) 
		else:
			ob, client = env.reset(client=client, relaunch=True) 
		s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))

		# document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics)

	env.end()  # This is for shutting down TORCS
	print("Finish.")
Example #5
0
def playGame(f_diagnostics,
             train_indicator,
             port=3101):  #1 means Train, 0 means simply Run

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 29  #Number of sensors input
    env_name = 'Torcs_Env'

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False)

    client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
    client.MAX_STEPS = np.inf

    client.get_servers_input(0)  # Get the initial input from torcs

    obs = client.S.d  # Get the current full-observation from torcs
    ob = env.make_observation(obs)

    # EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    # epsilon_steady_state = 0.01 # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        save_indicator = 0  # 1 to save the learned weights, 0 otherwise
        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')

        for step in range(max_steps):
            #Hard-coded steer=0, accel=1 and brake=0, define a_t as per any other algorithm
            a_t = np.asarray([0.0, 1.0, 0.0])  # [steer, accel, brake]

            ob, r_t, done, info = env.step(step, client, a_t, early_stop)
            if done:
                break
            analyse_info(info, printing=False)

            s_t1 = np.hstack(
                (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))
            distance_traversed += ob.speedX * np.cos(
                ob.angle)  #Assuming 1 step = 1 second
            speed_array.append(ob.speedX * np.cos(ob.angle))
            trackPos_array.append(ob.trackPos)

            #Checking for nan rewards: TODO: This was actually below the following block
            if (math.isnan(r_t)):
                r_t = 0.0
                for bad_r in range(50):
                    print("Bad Reward Found")
                break  #Introduced by Anirban

            total_reward += r_t
            s_t = s_t1

            # Displaying progress every 15 steps.
            if ((np.mod(step, 15) == 0)):
                print("Episode", i, "Step", step, "Epsilon", epsilon, "Action",
                      a_t, "Reward", r_t)

            totalSteps += 1
            if done:
                break

        # Saving the best model.
        if ((save_indicator == 1) and (train_indicator == 1)):
            if (total_reward >= best_reward):
                print("Now we save model with reward " + str(total_reward) +
                      " previous best reward was " + str(best_reward))
                best_reward = total_reward
                agent.saveNetwork()

        running_avg_reward = running_average(running_avg_reward, i + 1,
                                             total_reward)

        print("TOTAL REWARD @ " + str(i) + "-th Episode  : Num_Steps= " +
              str(step) + "; Max_steps= " + str(max_steps) + "; Reward= " +
              str(total_reward) + "; Running average reward= " +
              str(running_avg_reward))
        print("Total Step: " + str(totalSteps))
        print("")

        print(info)
        if 'termination_cause' in info.keys(
        ) and info['termination_cause'] == 'hardReset':
            print('\n\n***Hard reset by some agent***\n\n')
            ob, client = env.reset(client=client)
        else:
            ob, client = env.reset(client=client, relaunch=True)

        s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                         ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))

        ##uncomment this to get some statistics per episode like total distance traversed, average speed, distance from center of track, etc
        # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics)

    env.end()  # Shut down TORCS
    print("Finish.")