Ejemplo n.º 1
0
    def reset(self, relaunch=False):

        self.time_step = 0

        if not self.initial_reset:
            self.client.R.d['meta'] = True
            self.client.respond_to_server()

            if relaunch is True:

                self._set_track()

                self.container.exec_run("kill_torcs.sh", detach=True)
                self.container.exec_run("start_torcs.sh", detach=True)

        self.client = snakeoil3.Client(p=self.port)

        self.client.MAX_STEPS = np.inf

        self.client.get_servers_input()
        obs = self.client.S.d
        self.observation = self._make_observaton(obs)

        self.last_u = None

        self.initial_reset = False

        return self.get_obs()
Ejemplo n.º 2
0
    def reset(self, relaunch=False):
        #print("Reset")

        self.time_step = 0

        if self.initial_reset is not True:
            self.client.R.d['meta'] = True
            self.client.respond_to_server()

            ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
            if relaunch is True:
                self.reset_torcs()
                print("### TORCS is RELAUNCHED ###")

        # Modify here if you use multiple tracks in the environment
        self.client = snakeoil3.Client(p=3008, vision=self.vision)  # Open new UDP in vtorcs
        self.client.MAX_STEPS = np.inf

        client = self.client
        client.get_servers_input()  # Get the initial input from torcs

        obs = client.S.d  # Get the current full-observation from torcs
        self.observation = self.make_observation(obs)

        self.last_u = None

        self.initial_reset = False
        return self.get_obs()
Ejemplo n.º 3
0
    def reset(self, client, relaunch=False):

        port = client.port
        self.time_step = 0
        if self.initial_reset is not True:
            client.R.d['meta'] = True
            client.respond_to_server()

            ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
            if relaunch is True:
                self.reset_torcs()
                print("### TORCS is RELAUNCHED ###")

        # Modify here if you use multiple tracks in the environment
        client = snakeoil3.Client(p=port,
                                  vision=self.vision)  # Open new UDP in vtorcs
        client.MAX_STEPS = np.inf
        output = 1
        # client = self.client
        client.get_servers_input(0)  # Get the initial input from torcs

        obs = client.S.d  # Get the current full-observation from torcs
        self.observation = self.make_observation(obs)
        self.currState = np.hstack(
            (self.observation.angle, self.observation.track,
             self.observation.trackPos, self.observation.speedX,
             self.observation.speedY, self.observation.speedZ,
             self.observation.wheelSpinVel / 100.0, self.observation.rpm))

        self.last_u = None
        self.initial_reset = False
        return self.get_obs(), client
Ejemplo n.º 4
0
    def __init__(self,
                 id,
                 port,
                 vision=False,
                 throttle=False,
                 gear_change=False):
        # print("Init")
        self.vision = vision
        self.id = id  # Provides a reference to the car.
        self.port = port  # Port on which the agent will connect to the Torcs server.
        self.throttle = throttle
        self.gear_change = gear_change

        self.initial_run = True

        ##print("launch torcs")
        # os.system('pkill torcs')
        time.sleep(0.5)
        if self.vision is True:
            os.system('torcs -nofuel -nodamage -nolaptime  -vision &')
        else:
            os.system('torcs  -nofuel -nodamage -nolaptime &')
        time.sleep(0.5)
        os.system('sh autostart.sh')
        time.sleep(0.5)

        # Modify here if you use multiple tracks in the environment
        self.client = snakeoil3.Client(
            p=self.port, vision=self.vision)  # Open new UDP in vtorcs

        self.client.MAX_STEPS = np.inf

        client = self.client

        # Client gets stuck here. Server does not respond because it is
        # waiting for other clients to connect.
        # client.get_servers_input()  # Get the initial input from torcs

        obs = client.ServerState.data  # Get the current full-observation from torcs
        self.obs = obs
        if throttle is False:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1, ))
        else:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, ))

        if vision is False:
            high = np.array(
                [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf])
            low = np.array(
                [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf])
            self.observation_space = spaces.Box(low=low, high=high)
        else:
            high = np.array(
                [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
            low = np.array(
                [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
            self.observation_space = spaces.Box(low=low, high=high)
Ejemplo n.º 5
0
def main():

    assert len(
        sys.argv
    ) > 1, 'python model.py render/norender path_to_mode.json [seed]'

    render_mode_string = str(sys.argv[1])
    if (render_mode_string == "render"):
        render_mode = True
    else:
        render_mode = False

    use_model = False
    if len(sys.argv) > 2:
        use_model = True
        filename = sys.argv[2]
        print("filename", filename)

    the_seed = np.random.randint(10000)
    if len(sys.argv) > 3:
        the_seed = int(sys.argv[3])
        print("seed", the_seed)

    if (use_model):
        model = make_model()
        print('model size', model.param_count)
        client = snakeoil3.Client(p=3001,
                                  vision=False)  # Open new UDP in vtorcs
        client.MAX_STEPS = np.inf
        client.get_servers_input(0)
        model.make_env(client)
        model.load_model(filename)
    else:
        model = make_model(load_model=False)
        print('model size', model.param_count)
        model.make_env(render_mode=render_mode)
        model.init_random_model_params(stdev=np.random.rand() * 0.01)

    N_episode = 100
    if render_mode:
        N_episode = 1
    reward_list = []
    for i in range(N_episode):
        reward, steps_taken = simulate(model,
                                       train_mode=False,
                                       render_mode=render_mode,
                                       num_episode=1)
        if render_mode:
            print("terminal reward", reward, "average steps taken",
                  np.mean(steps_taken) + 1)
        else:
            print(reward[0])
        reward_list.append(reward[0])
    if not render_mode:
        print("seed", the_seed, "average_reward", np.mean(reward_list),
              "stdev", np.std(reward_list))
Ejemplo n.º 6
0
 def __init__(self, idx=0):
     self.idx = idx
     self.port = 3101 + self.idx
     self.action_dim = 3
     self.state_dim = 65
     self.obs = []
     self.client = snakeoil3.Client(p=self.port, vision=False)
     self.s_t = []
     self.r_t = 0
     self.done = 0
     self.action = [0, 1, 0]
     self.client.MAX_STEPS = np.inf
Ejemplo n.º 7
0
    def reset(self, relaunch=False, sampletrack=False, render=False):
        """ Reset the environment
            Arguments:
                - relaunch: Relaunch the game. Necessary to call with
                    from time to time because of the memory leak
                sampletrack: Sample a random track and load the game
                    with it at the relaunch. Relaunch needs to be 
                    true in order to modify the track!
                render: Change the mode. If true, game will be launch
                    in "render" mode else with "results only" mode.
                    Relaunch needs to be true in order to modify the track!
        """
        self.time_step = 0

        if relaunch:
            if sampletrack:
                try:
                    sample_track(self.root)
                except AttributeError:
                    pass
            try:
                set_render_mode(self.root, render=render)
            except AttributeError:
                pass
            self.tree.write(self.path)
            time.sleep(0.5)

        if self.initial_reset is not True:
            self.client.R.d['meta'] = True
            self.client.respond_to_server()

            ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
            if relaunch is True:
                self.reset_torcs()
                # print("### TORCS is RELAUNCHED ###")

        # Modify here if you use multiple tracks in the environment
        self.client = snakeoil3.Client(p=self.port,
                                       vision=False)  # Open new UDP in vtorcs
        self.client.MAX_STEPS = np.inf

        client = self.client
        client.get_servers_input()  # Get the initial input from torcs
        self.client.MAX_STEPS = np.inf

        obs = client.S.d  # Get the current full-observation from torcs
        self.observation = self.make_observaton(obs)

        self.place = int(obs["racePos"])

        self.initial_reset = False
        return self.get_obs()
Ejemplo n.º 8
0
def slave():
    packet = np.empty(SOLUTION_PACKET_SIZE, dtype=np.int32)
    comm.Recv(packet, source=0)
    assert (len(packet) == SOLUTION_PACKET_SIZE)
    solutions = decode_solution_packet(packet)
    worker_id, jobidx, seed, train_mode, max_len, weights = solutions[0]
    port = 3001 + worker_id
    print(port)
    client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
    client.MAX_STEPS = np.inf
    client.get_servers_input(0)
    model.make_env(client)
    packet = np.empty(SOLUTION_PACKET_SIZE, dtype=np.int32)

    results = []
    for solution in solutions:
        worker_id, jobidx, seed, train_mode, max_len, weights = solution
        assert (train_mode == 1 or train_mode == 0), str(train_mode)
        worker_id = int(worker_id)
        possible_error = "work_id = " + str(worker_id) + " rank = " + str(rank)
        assert worker_id == rank, possible_error
        jobidx = int(jobidx)
        seed = int(seed)
        fitness, timesteps = worker(weights, seed, train_mode, max_len)
    results.append([worker_id, jobidx, fitness, timesteps])
    result_packet = encode_result_packet(results)
    assert len(result_packet) == RESULT_PACKET_SIZE
    comm.Send(result_packet, dest=0)
    while 1:
        comm.Recv(packet, source=0)
        assert (len(packet) == SOLUTION_PACKET_SIZE)
        solutions = decode_solution_packet(packet)
        results = []
        for solution in solutions:
            worker_id, jobidx, seed, train_mode, max_len, weights = solution
            assert (train_mode == 1 or train_mode == 0), str(train_mode)
            worker_id = int(worker_id)
            possible_error = "work_id = " + str(worker_id) + " rank = " + str(
                rank)
            assert worker_id == rank, possible_error
            jobidx = int(jobidx)
            seed = int(seed)
            fitness, timesteps = worker(weights, seed, train_mode, max_len)
            results.append([worker_id, jobidx, fitness, timesteps])
        result_packet = encode_result_packet(results)
        assert len(result_packet) == RESULT_PACKET_SIZE
        comm.Send(result_packet, dest=0)
Ejemplo n.º 9
0
    def __init__(self, vision=False, throttle=False, gear_change=False):
        #print("Init")
        self.vision = vision
        self.throttle = throttle
        self.gear_change = gear_change

        self.initial_run = True

        ##print("launch torcs")
        #os.system('pkill torcs')
        time.sleep(0.5)
        if self.vision is True:
            os.system('torcs -nofuel -nodamage -nolaptime  -vision &')
        else:
            os.system('torcs  -nofuel -nodamage -nolaptime &')
        time.sleep(0.5)
        os.system('sh autostart.sh')
        time.sleep(0.5)

        # Modify here if you use multiple tracks in the environment
        self.client = snakeoil3.Client(
            p=3101, vision=self.vision)  # Open new UDP in vtorcs

        self.client.MAX_STEPS = np.inf

        client = self.client
        client.get_servers_input()  # Get the initial input from torcs

        obs = client.ServerState.data  # Get the current full-observation from torcs
        self.obs = obs
        if throttle is False:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1, ))
        else:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, ))

        if vision is False:
            high = np.array(
                [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf])
            low = np.array(
                [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf])
            self.observation_space = spaces.Box(low=low, high=high)
        else:
            high = np.array(
                [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
            low = np.array(
                [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
            self.observation_space = spaces.Box(low=low, high=high)
Ejemplo n.º 10
0
def main(_):
    """
    main function
    :param _: null
    :return: null
    """
    #init
    path = root_dir + '/' + save_dir
    print("save in {}, target_speed {}".format(path,FLAGS.target_speed))
    if (os.path.exists(path)):
        print('directory existed, run again')
        sys.exit()
    else:
        os.mkdir(root_dir + '/' + save_dir)

    os.chdir(path)
    pygame.joystick.init()
    joystick = pygame.joystick.Joystick(0)
    joystick.init()
    pygame.display.init()

    #variable
    C = snakeoil3_gym.Client(p=3001)  # 3001
    driver = driveClient(FLAGS.target_speed,C,joystick)
    start = time.time()
    save = myThread(joystick,path)

    #save image
    save.start()

    #control fragment
    for step in range(C.maxSteps, 0, -1):
        C.get_servers_input()
        driver.drive()
        C.respond_to_server()
    C.shutdown()

    print(time.time() - start)
    with open(path + '/' + 'data.txt', 'w') as f:
        for l in labels:
            f.write(str(l) + '\n')

    save.stop(True)
    # save.is

    sys.exit()
Ejemplo n.º 11
0
    def reset(self, relaunch=False):
        #print("Reset")
        self.time_step = 0

        if self.initial_reset is not True:
            self.client.R.d['meta'] = True
            self.client.respond_to_server()

            ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
            if relaunch is True:
                self.reset_torcs()
                print("### TORCS is RELAUNCHED ###")

        # Modify here if you use multiple tracks in the environment
        ### dosssman: Pass existing process id and race config path
        self.client = snakeoil3.Client(
            p=3101,
            vision=self.vision,
            process_id=self.torcs_process_id,
            race_config_path=self.race_config_path,
            race_speed=self.race_speed,
            rendering=self.rendering,
            lap_limiter=self.lap_limiter,
            damage=self.damage,
        )  #Open new UDP in vtorcs

        self.client.MAX_STEPS = np.inf

        client = self.client

        client.get_servers_input()  # Get the initial input from torcs

        obs = client.S.d  # Get the current full-observation from torcs
        self.observation = self.make_observaton(obs)

        self.last_u = None

        self.initial_reset = False

        # THe newly created TOrcs PID is also reattached to the Gym Torcs Env
        # This should be temporary ... but only time knows
        self.torcs_process_id = self.client.torcs_process_id

        return self.get_obs()
Ejemplo n.º 12
0
		R['gear']=2
	if S['speedX']>80:
		R['gear']=3
	if S['speedX']>110:
		R['gear']=4
	if S['speedX']>140:
		R['gear']=5
	if S['speedX']>170:
		R['gear']=6
	return




if __name__ == "__main__":
    C= snakeoil3_gym.Client(p=3101)
    for step in range(C.maxSteps,0,-1):
        C.get_servers_input()
        mydrive(C)
        C.respond_to_server()        
    C.shutdown()


# #!/usr/bin/python
# import snakeoil3_gym
# if __name__ == "__main__":
#     Cs= [ snakeoil3_gym.Client(p=P) for P in [3101,3102,3103,3104] ]
#     for step in range(Cs[0].maxSteps,0,-1):
#         for C in Cs:
#             C.get_servers_input()
#             mydrive(C)
Ejemplo n.º 13
0
def playGame(f_diagnostics, train_indicator, port=3101):    # 1 means Train, 0 means simply Run

	action_dim = 3  #Steering/Acceleration/Brake
	state_dim = 23+18  #Number of sensors input  #23 for simstar
	if(USE_SIMSTAR):
		env_name = 'Simstar_Env'
	else:
		env_name = 'Torcs_Env'
	
	agent = DDPG(env_name, state_dim, action_dim)

	# Generate a Torcs environment
	print("I have been asked to use port: ", port)
	if(USE_SIMSTAR):
		env = SimstarEnv(synronized_mode=True,
		speed_up=4,hz=5,add_agent=True,agent_set_speed=0,
		agent_rel_pos=35,autopilot_agent=OTHER_AGENT_AUTOPILOT)
		ob = env.reset()
	else:
		env = TorcsEnv(vision=False, throttle=True, gear_change=False)

		client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
		client.MAX_STEPS = np.inf

		client.get_servers_input(0)  # Get the initial input from torcs

		obs = client.S.d  # Get the current full-observation from torcs
		ob = env.make_observation(obs)

	s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents))
	state_other_vehicle = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents))

	EXPLORE = total_explore
	episode_count = max_eps
	max_steps = max_steps_eps
	epsilon = epsilon_start
	done = False
	epsilon_steady_state = 0.01 # This is used for early stopping.

	totalSteps = 0
	best_reward = -100000
	running_avg_reward = 0.

	print("TORCS Experiment Start.")
	for i in range(episode_count):

		save_indicator = 1
		early_stop = 1
		# Counting the total reward and total steps in the current episode
		total_reward = 0.
		info = {'termination_cause':0}
		distance_traversed = 0.
		speed_array=[]
		trackPos_array=[]

		print('\n\nStarting new episode...\n')

		for step in range(max_steps):

			# Take noisy actions during training
			if (train_indicator):
				epsilon -= 1.0 / EXPLORE
				epsilon = max(epsilon, epsilon_steady_state)
				a_t = agent.noise_action(s_t,epsilon) #Take noisy actions during training
				a_t = agent.action(s_t)
			else:
				a_t = agent.action(s_t)		# a_t is of the form: [steer, accel, brake]

			if(USE_SIMSTAR):
				if not OTHER_AGENT_AUTOPILOT:
					# control second vehicle 
					obs_other = env.get_agent_obs()
					state_other_vehicle = np.hstack((obs_other.angle, obs_other.track, obs_other.trackPos, obs_other.speedX, obs_other.speedY,obs_other.opponents))
					other_vehicle_action = agent.action(state_other_vehicle)
					env.set_agent_action(other_vehicle_action)
					
				ob, r_t, done, info = env.step(a_t)
			else:
				ob, r_t, done, info = env.step(step, client, a_t, early_stop)
			if done:
				break
			analyse_info(info, printing=False)

			s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents))
			distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second
			speed_array.append(ob.speedX*np.cos(ob.angle))
			trackPos_array.append(ob.trackPos)


			#Checking for nan rewards: TODO: This was actually below the following block
			if (math.isnan( r_t )):
				r_t = 0.0
				for bad_r in range( 50 ):
					print( 'Bad Reward Found' )
				break #Introduced by Anirban


			# Add to replay buffer only if training
			if (train_indicator):
				agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer


			total_reward += r_t
			s_t = s_t1

			# Displaying progress every 15 steps.
			if ( (np.mod(step,15)==0) ):
			    print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t )

			totalSteps += 1
			if done:
				break

		# Saving the best model.
		if ((save_indicator==1) and (train_indicator ==1 )):
			if (total_reward >= best_reward):
				print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward))
				best_reward = total_reward
				agent.saveNetwork()

			if np.mod(i, 20) == 0:
				print("***************************************************************************************************************************")
				agent.saveNetwork()

		running_avg_reward = running_average(running_avg_reward, i+1, total_reward)

		print("TOTAL REWARD @ " + str(i) +"-th Episode  : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward))
		print("Total Step: " + str(totalSteps))
		print("")

		print(info)
		if(USE_SIMSTAR):
			ob= env.reset()
		else:
			if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset':
				print('Hard reset by some agent')
				ob, client = env.reset(client=client)
			else:
				ob, client = env.reset(client=client, relaunch=True)
		s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,ob.opponents))

		# document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics)

	env.end()  # Shut down TORCS
	print("Finish.")
Ejemplo n.º 14
0
def playGame(f_diagnostics,
             train_indicator,
             agent,
             port=3101):  # 1 means Train, 0 means simply Run

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 65  #of sensors input
    env_name = 'Torcs_Env'
    save_location = "./weights/"

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1)
    ob = None
    while ob is None:
        try:
            client = snakeoil3.Client(p=port,
                                      vision=False)  # Open new UDP in vtorcs
            client.MAX_STEPS = np.inf
            client.get_servers_input(0)  # Get the initial input from torcs

            obs = client.S.d  # Get the current full-observation from torcs
            ob = env.make_observation(obs)

            s_t = np.hstack((ob.angle, ob.track, ob.trackPos, \
             ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))
        except:
            pass

    EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    epsilon_steady_state = 0.01  # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        save_indicator = 0
        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')
        print("Initial memory consumption: ")

        for step in range(max_steps):

            # Take noisy actions during training
            if (train_indicator == 1):
                epsilon -= 1.0 / EXPLORE
                epsilon = max(epsilon, epsilon_steady_state)
                a_t = agent.noise_action(
                    s_t, epsilon)  #Take noisy actions during training

            else:
                a_t = agent.action(s_t)

            try:
                ob, r_t, done, info = env.step(step, client, a_t, early_stop)
                if done:
                    break

                analyse_info(info, printing=False)

                s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \
                 ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))
                distance_traversed += ob.speedX * np.cos(
                    ob.angle)  #Assuming 1 step = 1 second

                if (math.isnan(r_t)):
                    r_t = 0.0
                    for bad_r in range(50):
                        print('Bad Reward Found')
                    break  #Introduced by Anirban

            # Add to replay buffer only if training
                if (train_indicator):
                    agent.perceive(s_t, a_t, r_t, s_t1,
                                   done)  # Add experience to replay buffer

            except Exception as e:
                print("Exception caught at port " + str(i) + str(e))
                ob = None
                while ob is None:
                    try:
                        client = snakeoil3.Client(
                            p=port, vision=False)  # Open new UDP in vtorcs
                        client.MAX_STEPS = np.inf
                        client.get_servers_input(
                            0)  # Get the initial input from torcs
                        obs = client.S.d  # Get the current full-observation from torcs
                        ob = env.make_observation(obs)
                    except:
                        pass
                    continue
            total_reward += r_t
            s_t = s_t1

            # Displaying progress every 15 steps.
            if ((np.mod(step, 15) == 0)):
                print("Episode", i, "Step", step, "Epsilon", epsilon, "Action",
                      a_t, "Reward", r_t)

            totalSteps += 1
            if done:
                break

        # Saving the best model.
        running_avg_reward = running_average(running_avg_reward, i + 1,
                                             total_reward)

        if train_indicator == 1:

            #Save network after every 20 episodes and store the data
            if np.mod(i, 20) == 0:
                agent.saveNetwork(i)

        #Saving training data for client for analysis
        if train_indicator == 1 and np.mod(i, 5) == 0:
            f1 = open(str(port) + ".csv", "a+")
            client.printAnalysis(f1, i)
            f1.close()


        print("TOTAL REWARD @ " + str(i) +"-th Episode  : Num_Steps= " + str(step) + "; Max_steps= " \
         + str(max_steps)  +"; Reward= " + str(total_reward) + \
          "; Running average reward= " + str(running_avg_reward))
        print("Total Step: " + str(totalSteps))
        print("")

        print(info)
        try:
            if 'termination_cause' in info.keys(
            ) and info['termination_cause'] == 'hardReset':
                print('Hard reset by some agent')
                ob, client = env.reset(client=client, relaunch=True)
            else:
                ob, client = env.reset(client=client, relaunch=True)
        except Exception as e:
            print("Exception caught at point B at port " + str(i) + str(e))
            ob = None
            while ob is None:
                try:
                    client = snakeoil3.Client(
                        p=port, vision=False)  # Open new UDP in vtorcs
                    client.MAX_STEPS = np.inf
                    client.get_servers_input(
                        0)  # Get the initial input from torcs
                    obs = client.S.d  # Get the current full-observation from torcs
                    ob = env.make_observation(obs)
                except:
                    print("Exception caught at at point C at port " + str(i) +
                          str(e))


        s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, \
         ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))

    env.end()  # This is for shutting down TORCS
    f1.close()
    print("Finish.")
Ejemplo n.º 15
0
    def __init__(self,
                 vision=False,
                 throttle=False,
                 gear_change=False,
                 display=False):
        self.vision = vision
        self.throttle = throttle
        self.gear_change = gear_change
        self.display = display
        self.initial_run = True

        print("launch torcs")
        os.system('pkill torcs')
        time.sleep(0.5)
        cur_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
        if self.vision is True:
            os.system('torcs -nofuel -nodamage -nolaptime -vision &')
        else:
            if self.display is True:
                os.system('torcs -nofuel -nolaptime &')
                time.sleep(0.5)
                os.system('sh autostart.sh')
            else:
                os.system('torcs -r ' + cur_dir +
                          '/race_config.xml -nofuel -nolaptime &')

        time.sleep(0.5)
        # Modify here if you use multiple tracks in the environment
        self.client = snakeoil3.Client(
            p=3001, vision=self.vision,
            display=self.display)  # Open new UDP in vtorcs
        self.client.MAX_STEPS = np.inf

        client = self.client
        client.get_servers_input()  # Get the initial input from torcs

        obs = client.S.d  # Get the current full-observation from torcs

        if throttle is False:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1, ))
        else:
            self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2, ))

        if vision is False:
            high = np.array(
                [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf])
            low = np.array(
                [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf])
            self.observation_space = spaces.Box(low=low, high=high)
        else:
            high = np.array(
                [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
            low = np.array(
                [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
            self.observation_space = spaces.Box(low=low, high=high)
        self.prevSpeedX = 0
        self.prevAccX = 0
        self.prevJerkX = 0
        self.prevSnapX = 0

        self.prevSpeedY = 0
        self.prevAccY = 0
        self.prevJerkY = 0
        self.prevSnapY = 0

        self.past_t = 0
        self.past_d = 0
        self.KPH_to_MPS = 0.277778
Ejemplo n.º 16
0
def playGame(f_diagnostics,
             train_indicator,
             port=3101):  #1 means Train, 0 means simply Run

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 29  #Number of sensors input
    env_name = 'Torcs_Env'

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False)

    client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
    client.MAX_STEPS = np.inf

    client.get_servers_input(0)  # Get the initial input from torcs

    obs = client.S.d  # Get the current full-observation from torcs
    ob = env.make_observation(obs)

    # EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    # epsilon_steady_state = 0.01 # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        save_indicator = 0  # 1 to save the learned weights, 0 otherwise
        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')

        for step in range(max_steps):
            #Hard-coded steer=0, accel=1 and brake=0, define a_t as per any other algorithm
            a_t = np.asarray([0.0, 1.0, 0.0])  # [steer, accel, brake]

            ob, r_t, done, info = env.step(step, client, a_t, early_stop)
            if done:
                break
            analyse_info(info, printing=False)

            s_t1 = np.hstack(
                (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                 ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))
            distance_traversed += ob.speedX * np.cos(
                ob.angle)  #Assuming 1 step = 1 second
            speed_array.append(ob.speedX * np.cos(ob.angle))
            trackPos_array.append(ob.trackPos)

            #Checking for nan rewards: TODO: This was actually below the following block
            if (math.isnan(r_t)):
                r_t = 0.0
                for bad_r in range(50):
                    print("Bad Reward Found")
                break  #Introduced by Anirban

            total_reward += r_t
            s_t = s_t1

            # Displaying progress every 15 steps.
            if ((np.mod(step, 15) == 0)):
                print("Episode", i, "Step", step, "Epsilon", epsilon, "Action",
                      a_t, "Reward", r_t)

            totalSteps += 1
            if done:
                break

        # Saving the best model.
        if ((save_indicator == 1) and (train_indicator == 1)):
            if (total_reward >= best_reward):
                print("Now we save model with reward " + str(total_reward) +
                      " previous best reward was " + str(best_reward))
                best_reward = total_reward
                agent.saveNetwork()

        running_avg_reward = running_average(running_avg_reward, i + 1,
                                             total_reward)

        print("TOTAL REWARD @ " + str(i) + "-th Episode  : Num_Steps= " +
              str(step) + "; Max_steps= " + str(max_steps) + "; Reward= " +
              str(total_reward) + "; Running average reward= " +
              str(running_avg_reward))
        print("Total Step: " + str(totalSteps))
        print("")

        print(info)
        if 'termination_cause' in info.keys(
        ) and info['termination_cause'] == 'hardReset':
            print('\n\n***Hard reset by some agent***\n\n')
            ob, client = env.reset(client=client)
        else:
            ob, client = env.reset(client=client, relaunch=True)

        s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                         ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm))

        ##uncomment this to get some statistics per episode like total distance traversed, average speed, distance from center of track, etc
        # document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics)

    env.end()  # Shut down TORCS
    print("Finish.")
Ejemplo n.º 17
0
def master():

    start_time = int(time.time())
    sprint("training", gamename)
    sprint("population", es.popsize)
    sprint("num_worker", num_worker)
    sprint("num_worker_trial", num_worker_trial)
    sys.stdout.flush()

    seeder = Seeder(seed_start)

    filename = filebase + '.json'
    filename_log = filebase + '.log.json'
    filename_hist = filebase + '.hist.json'
    filename_hist_best = filebase + '.hist_best.json'
    filename_best = filebase + '.best.json'

    port = 3001

    client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
    client.MAX_STEPS = np.inf
    client.get_servers_input(0)
    model.make_env(client)
    port = port + 1
    t = 0

    history = []
    history_best = []  # stores evaluation averages every 25 steps or so
    eval_log = []
    best_reward_eval = 0
    best_model_params_eval = None

    max_len = -1  # max time steps (-1 means ignore)

    while True:
        t += 1

        solutions = es.ask()

        if antithetic:
            seeds = seeder.next_batch(int(es.popsize / 2))
            seeds = seeds + seeds
        else:
            seeds = seeder.next_batch(es.popsize)

        packet_list = encode_solution_packets(seeds,
                                              solutions,
                                              max_len=max_len)

        send_packets_to_slaves(packet_list)
        reward_list_total = receive_packets_from_slaves()

        reward_list = reward_list_total[:, 0]  # get rewards

        mean_time_step = int(np.mean(reward_list_total[:, 1]) *
                             100) / 100.  # get average time step
        max_time_step = int(np.max(reward_list_total[:, 1]) *
                            100) / 100.  # get average time step
        avg_reward = int(
            np.mean(reward_list) * 100) / 100.  # get average time step
        std_reward = int(
            np.std(reward_list) * 100) / 100.  # get average time step

        es.tell(reward_list)

        es_solution = es.result()
        model_params = es_solution[0]  # best historical solution
        reward = es_solution[1]  # best reward
        curr_reward = es_solution[2]  # best of the current batch
        model.set_model_params(np.array(model_params).round(4))

        r_max = int(np.max(reward_list) * 100) / 100.
        r_min = int(np.min(reward_list) * 100) / 100.

        curr_time = int(time.time()) - start_time

        h = (t, curr_time, avg_reward, r_min, r_max, std_reward,
             int(es.rms_stdev() * 100000) / 100000., mean_time_step + 1.,
             int(max_time_step) + 1)

        if cap_time_mode:
            max_len = 2 * int(mean_time_step + 1.0)
        else:
            max_len = -1

        history.append(h)

        with open(filename, 'wt') as out:
            res = json.dump([np.array(es.current_param()).round(4).tolist()],
                            out,
                            sort_keys=True,
                            indent=2,
                            separators=(',', ': '))

        with open(filename_hist, 'wt') as out:
            res = json.dump(history,
                            out,
                            sort_keys=False,
                            indent=0,
                            separators=(',', ':'))

        sprint(gamename, h)

        if (t == 1):
            best_reward_eval = avg_reward
        if (t % eval_steps == 0):  # evaluate on actual task at hand

            prev_best_reward_eval = best_reward_eval
            model_params_quantized = np.array(es.current_param()).round(4)
            reward_eval = evaluate_batch(model_params_quantized, max_len=-1)
            model_params_quantized = model_params_quantized.tolist()
            improvement = reward_eval - best_reward_eval
            eval_log.append([t, reward_eval, model_params_quantized])
            with open(filename_log, 'wt') as out:
                res = json.dump(eval_log, out)
            if (len(eval_log) == 1 or reward_eval > best_reward_eval):
                best_reward_eval = reward_eval
                best_model_params_eval = model_params_quantized
            else:
                if retrain_mode:
                    sprint(
                        "reset to previous best params, where best_reward_eval =",
                        best_reward_eval)
                    es.set_mu(best_model_params_eval)
            with open(filename_best, 'wt') as out:
                res = json.dump([best_model_params_eval, best_reward_eval],
                                out,
                                sort_keys=True,
                                indent=0,
                                separators=(',', ': '))
            # dump history of best
            curr_time = int(time.time()) - start_time
            best_record = [
                t, curr_time, "improvement", improvement, "curr", reward_eval,
                "prev", prev_best_reward_eval, "best", best_reward_eval
            ]
            history_best.append(best_record)
            with open(filename_hist_best, 'wt') as out:
                res = json.dump(history_best,
                                out,
                                sort_keys=False,
                                indent=0,
                                separators=(',', ':'))

            sprint("Eval", t, curr_time, "improvement", improvement, "curr",
                   reward_eval, "prev", prev_best_reward_eval, "best",
                   best_reward_eval)
Ejemplo n.º 18
0
def playGame(f_diagnostics,
             train_indicator,
             agent,
             port=3101):  # 1 means Train, 0 means simply Run

    action_dim = 3  #Steering/Acceleration/Brake
    state_dim = 65  #of sensors input
    env_name = 'Torcs_Env'
    save_location = "./weights/"

    # Generate a Torcs environment
    print("I have been asked to use port: ", port)
    env = TorcsEnv(vision=False, throttle=True, gear_change=False, main=1)
    ob = None
    while ob is None:
        try:
            client = snakeoil3.Client(p=port,
                                      vision=False)  # Open new UDP in vtorcs
            client.MAX_STEPS = np.inf

            client.get_servers_input(0)  # Get the initial input from torcs
            obs = client.S.d  # Get the current full-observation from torcs
            ob = env.make_observation(obs)

        except:
            pass

    EXPLORE = total_explore
    episode_count = max_eps
    max_steps = max_steps_eps
    epsilon = epsilon_start
    done = False
    epsilon_steady_state = 0.01  # This is used for early stopping.

    totalSteps = 0
    best_reward = -100000
    running_avg_reward = 0.

    print("TORCS Experiment Start.")
    for i in range(episode_count):

        save_indicator = 0
        early_stop = 1
        total_reward = 0.
        info = {'termination_cause': 0}
        distance_traversed = 0.
        speed_array = []
        trackPos_array = []

        print('\n\nStarting new episode...\n')
        print("Initial memory consumption: ")
        for step in range(max_steps):

            # Take noisy actions during training
            try:
                client.get_servers_input(step)
                snakeoil3.drive_example(client)
                client.respond_to_server()

            except Exception as e:
                print("Exception caught at port " + str(i) + str(e))
                ob = None
                while ob is None:
                    try:
                        client = snakeoil3.Client(
                            p=port, vision=False)  # Open new UDP in vtorcs
                        client.MAX_STEPS = np.inf
                        client.get_servers_input(
                            0)  # Get the initial input from torcs
                        obs = client.S.d  # Get the current full-observation from torcs
                        ob = env.make_observation(obs)
                    except:
                        pass
                    continue

            if done:
                break

        print(info)
        try:
            if 'termination_cause' in info.keys(
            ) and info['termination_cause'] == 'hardReset':
                print('Hard reset by some agent')
                ob, client = env.reset(client=client, relaunch=True)
            else:
                ob, client = env.reset(client=client, relaunch=True)
        except Exception as e:
            print("Exception caught at point B at port " + str(i) + str(e))
            ob = None
            while ob is None:
                try:
                    client = snakeoil3.Client(
                        p=port, vision=False)  # Open new UDP in vtorcs
                    client.MAX_STEPS = np.inf
                    client.get_servers_input(
                        0)  # Get the initial input from torcs
                    obs = client.S.d  # Get the current full-observation from torcs
                    ob = env.make_observation(obs)
                except:
                    print("Exception caught at at point C at port " + str(i) +
                          str(e))

    env.end()  # This is for shutting down TORCS
    print("Finish.")
Ejemplo n.º 19
0
MAX_TRIALS = 200  # just use this to extract one trial.

render_mode = False  # for debugging.

parser = argparse.ArgumentParser(
    description=('Train policy on OpenAI Gym environment '
                 'using pepg, ses, openes, ga, cma'))
parser.add_argument('--port', type=int, default=1, help='port')
args = parser.parse_args()

DIR_NAME = 'record'
if not os.path.exists(DIR_NAME):
    os.makedirs(DIR_NAME)

port = 3001 + args.port
client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
client.MAX_STEPS = np.inf

if port < 3006:
    time.sleep(5)

client.get_servers_input(0)  # Get the initial input from torcs

model = make_model(load_model=False)

total_frames = 0
model.make_env(client, render_mode=render_mode, full_episode=True)
#obs = client.S.d  # Get the current full-observation from torcs
#ob = env.make_observation(obs)
for trial in range(MAX_TRIALS):  # 200 trials per worker
    try:
Ejemplo n.º 20
0
def mydrive(c,vel,ster,fi,ke):
	S,R= c.S.d,c.R.d
	target_speed=vel
	if ke <=6 :
		R['steer']= S['angle']*10 / snakeoil3_gym.PI
		R['steer']-= S['trackPos']*.10 
		print('ke',vel)
	else:
		R['steer']=ster 
	#print(S['xcod'],S['ycod'])
	if S['speedX'] < target_speed - (R['steer']*50):
		R['accel']+= .01
	else:
		R['accel']-= .01
	if S['speedX']<10:
		#R['accel']+= 1/(S['speedX']+.1)
		R['accel']+=0.02

	if ke == 9 :


	if ((S['wheelSpinVel'][2]+S['wheelSpinVel'][3])-(S['wheelSpinVel'][0]+S['wheelSpinVel'][1]) > 5):
		R['accel']-= .2

	R['gear']=1
	if S['speedX']>50:
		R['gear']=2
	if S['speedX']>80:
		R['gear']=3
	if S['speedX']>110:
		R['gear']=4
	if S['speedX']>140:
		R['gear']=5
	if S['speedX']>170:
		R['gear']=6
	if(fi==9):	
		print(S['xcod'],S['ycod'],0,0,S['angle'],10,0)
		x0 = S['xcod']
		y0 = S['ycod']
		x_ob = 0
		y_ob = 0
		theta_in = S['angle']
		na = 10
		mela = 0
		ob=0
		if item in S['opponents']<30:
			x_ob = S['xcod']+S['opponents']*cos(S['opponents'].index(item)*10*pi/180);
			y_ob = S['ycod']+S['opponents']*sin(S['opponents'].index(item)*10*pi/180);
			ob=1
		return x0,y0,x_ob,y_ob,theta_in,na,mela,ob
	else:
		return 






if __name__ == "__main__":
	i=0

	if i==0:
		eng = matlab.engine.start_matlab()
		eng.launch_iros(85.3066,19.0348,0,0,0,10,1)
		eng.quit()
		velo = genfromtxt('vec.csv', delimiter=',')
		ster = genfromtxt('woc.csv', delimiter=',')

	ct =0 	
	C= snakeoil3_gym.Client(p=3101)
	for step in range(C.maxSteps,0,-1):
		C.get_servers_input()
		if (i==9):
			x0,y0,x_ob,y_ob,theta_in,na,mela,ob=mydrive(C,int(velo[i]),int(ster[i]),i,ct)
		else:
			mydrive(C,int(velo[i]),int(ster[i]),i,ct) 
		i+=1
		if (i==10):
			keyboard.press('p')
			eng = matlab.engine.start_matlab()
			eng.launch_iros(y0,x0,x_ob,y_ob,theta_in,na,mela,ob)
			eng.quit()
			velo = genfromtxt('vec.csv', delimiter=',')
			ster = genfromtxt('woc.csv', delimiter=',')
			i = 0
			ct+=1
		keyboard.release('p')
		C.respond_to_server()
		if ct == 10:
			break
	C.shutdown()


# #!/usr/bin/python
# import snakeoil3_gym
# if __name__ == "__main__":
#     Cs= [ snakeoil3_gym.Client(p=P) for P in [3101,3102,3103,3104] ]
#     for step in range(Cs[0].maxSteps,0,-1):
#         for C in Cs:
#             C.get_servers_input()
#             mydrive(C)
#             C.respond_to_server()
#     else:
#         for C in Cs: C.shutdown()
Ejemplo n.º 21
0
        for worker in workers:  # worker threads
            #client = snakeoil3.Client(p=3101+i, vision=False)  # Open new UDP in vtorcs
            #client.MAX_STEPS = np.inf
            #i+=1
            t = threading.Thread(target=worker.work, args=())
            t.start()
            threads.append(t)
    # add a PPO updating thread
        threads.append(threading.Thread(target=GLOBAL_PPO.update, ))
        threads[-1].start()
        COORD.join(threads)
        #save_path = saver.save(sess, "./model.ckpt")
    else:
        # plot reward change and testing
        #plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R)
        #plt.xlabel('Episode'); plt.ylabel('Moving reward'); plt.ion(); plt.show()
        #env.set_fps(30)
        while True:
            client = snakeoil3.Client(p=3101, vision=False)
            ob, client = env.reset(client)
            for t in range(4000):
                s = np.hstack(
                    (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                     ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))
                a = GLOBAL_PPO.choose_action(s)
                #a[1] = abs(a[1])
                #a[2] = abs(a[2])
                #env.render()
                ob, r, done, info = env.step(t, client, a)
                print("reward at current step " + str(r))
Ejemplo n.º 22
0
 def __init__(self, wid):
     self.wid = wid
     self.ppo = GLOBAL_PPO
     #self.i = i
     self.client = snakeoil3.Client(p=3101 + self.wid, vision=False)
Ejemplo n.º 23
0
    def work(self):

        best = 0

        self.client.MAX_STEPS = np.inf
        self.client.get_servers_input(0)  # Get the initial input from torcs
        obs = self.client.S.d  # Get the current full-observation from torcs
        ob = env.make_observation(obs)

        global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER

        while not COORD.should_stop():
            try:
                ob, self.client = env.reset(self.client)
            except Exception as e:
                print("Exception caught in reset " +
                      str(traceback.format_exc()))
                while True:
                    try:
                        self.client = snakeoil3.Client(
                            p=3101 + self.wid,
                            vision=False)  # Open new UDP in vtorcs
                        self.client.MAX_STEPS = np.inf
                        self.client.get_servers_input(
                            0)  # Get the initial input from torcs
                        obs = self.client.S.d  # Get the current full-observation from torcs
                        ob = env.make_observation(obs)
                    except Exception as e2:
                        print("Exception caught in reset's exception " +
                              str(traceback.format_exc()))
                    else:
                        print("blahblahblah")
                        break
            ep_r = 0
            buffer_s, buffer_a, buffer_r = [], [], []

            for t in range(EP_LEN):
                if not ROLLING_EVENT.is_set():  # while global PPO is updating
                    ROLLING_EVENT.wait()  # wait until PPO is updated
                    buffer_s, buffer_a, buffer_r = [], [], [
                    ]  # clear history buffer

                s = np.hstack(
                    (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                     ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))

                a = self.ppo.choose_action(s)
                ETA = (1.0 - 2 * float(GLOBAL_EP / EP_MAX))
                #                if self.wid == 0:
                #                    ETA = -1.0
                a[0] += max(ETA, 0) * OU.function(a[0], 0.0, 0.8, 0.40)
                a[1] += max(ETA, 0) * OU.function(a[1], 0.5, 0.80, 0.10)
                a[2] += max(ETA, 0) * OU.function(a[2], -0.1, 1.00, 0.05)
                #a[2] = 0.
                #ETA = (1-2*GLOBAL_EP/EP_MAX)
                #a[1] = abs(a[1])
                #a[2] = abs(a[2])

                try:
                    ob, r, done, info = env.step(t, self.client, a)
                except Exception as e:
                    print("Exception caught in step " +
                          str(traceback.format_exc()))
                    while True:
                        try:
                            self.client = snakeoil3.Client(
                                p=3101 + self.wid,
                                vision=False)  # Open new UDP in vtorcs
                            self.client.MAX_STEPS = np.inf
                            self.client.get_servers_input(
                                0)  # Get the initial input from torcs
                            obs = self.client.S.d  # Get the current full-observation from torcs
                            print(ob)
                            ob = env.make_observation(obs)
                        except Exception as e2:
                            print("Exception caught in reset's exception " +
                                  str(traceback.format_exc()))
                        else:
                            print(
                                "12321321321321321321312321321321321321312321312312312312"
                            )
                            break
                    continue

                print("Episode: " + str(GLOBAL_EP) + " Step: " + str(t) +
                      " Action: " + str(a) + "Reward: " + str(r))

                s_ = np.hstack(
                    (ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,
                     ob.speedZ, ob.wheelSpinVel / 100.0, ob.rpm, ob.opponents))

                #s_, r, done = env.step(a)
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)  # normalize reward, find to be useful

                s = s_
                ep_r += r

                GLOBAL_UPDATE_COUNTER += 1  # count to minimum batch size
                if t == EP_LEN - 1 or GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE:
                    v_s_ = self.ppo.get_v(s_)
                    discounted_r = []  # compute discounted reward
                    for r in buffer_r[::-1]:
                        v_s_ = r + GAMMA * v_s_
                        discounted_r.append(v_s_)
                    discounted_r.reverse()
                    bs, ba, br = np.vstack(buffer_s), np.vstack(
                        buffer_a), np.array(discounted_r)[:, np.newaxis]
                    buffer_s, buffer_a, buffer_r = [], [], []
                    QUEUE.put(np.hstack((bs, ba, br)))
                    if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE:
                        ROLLING_EVENT.clear()  # stop collecting data
                        UPDATE_EVENT.set()  # globalPPO update

                    if GLOBAL_EP >= EP_MAX:  # stop training
                        COORD.request_stop()
                        break
                if done:
                    break
            if (GLOBAL_EP % 10 == 0 and ep_r > best):
                best = ep_r
                ckpt_path = os.path.join('./weights_new/' + '%i' % GLOBAL_EP,
                                         'PPO.ckpt')
                save_path = GLOBAL_PPO.saver.save(GLOBAL_PPO.sess,
                                                  ckpt_path,
                                                  write_meta_graph=False)
            # record reward changes, plot later
            if self.wid == 0:
                Episode_reward.append(ep_r)
                np.savetxt('episode_reward.txt', Episode_reward)
            if len(GLOBAL_RUNNING_R) == 0: GLOBAL_RUNNING_R.append(ep_r)
            else:
                GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 +
                                        ep_r * 0.1)
            GLOBAL_EP += 1
            print('{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100),
                  '|W%i' % self.wid, '|Ep_r: %.2f' % ep_r,
                  '\t |Best Ep_r: %.2f' % best, '\t |Epsilon: %.4f' % ETA)
Ejemplo n.º 24
0
def playGame(f_diagnostics, train_indicator, port=3101):    # 1 means Train, 0 means simply Run
	
	action_dim = 3  #Steering/Acceleration/Brake
	state_dim = 29  #of sensors input
	env_name = 'Torcs_Env'
	agent = DDPG(env_name, state_dim, action_dim)

	# Generate a Torcs environment
	print("I have been asked to use port: ", port)
	env = TorcsEnv(vision=False, throttle=True, gear_change=False) 
	
	client = snakeoil3.Client(p=port, vision=False)  # Open new UDP in vtorcs
	client.MAX_STEPS = np.inf

	client.get_servers_input(0)  # Get the initial input from torcs

	obs = client.S.d  # Get the current full-observation from torcs
	ob = env.make_observation(obs)

	s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))

	EXPLORE = total_explore
	episode_count = max_eps
	max_steps = max_steps_eps
	epsilon = epsilon_start
	done = False
	epsilon_steady_state = 0.01 # This is used for early stopping.
 
	totalSteps = 0
	best_reward = -100000
	running_avg_reward = 0.

	print("TORCS Experiment Start.")
	for i in range(episode_count):

		save_indicator = 0
			
		# env.reset(client=client, relaunch=True)	
		# random_number = random.random()
		# eps_early = max(epsilon,epsilon_steady_state) #At least 0.01 
		# if (random_number < (1.0-eps_early)) and (train_indicator == 1): #During training, at most 99% of the time, early stopping would be engaged 
		#     early_stop = 1
		# else: 
		#     early_stop = 0
		early_stop = 1
		# print("Episode : " + str(i) + " Replay Buffer " + str(agent.replay_buffer.count()) + ' Early Stopping: ' + str(early_stop) +  ' Epsilon: ' + str(eps_early) +  ' RN: ' + str(random_number)  )

		#Initializing the first state
		# s_t = np.hstack((ob['angle'], ob['track'], ob['trackPos'], ob['speedX'], ob['speedY'],  ob['speedZ'], ob['wheelSpinVel']/100.0, ob['rpm']))
		# s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
		# Counting the total reward and total steps in the current episode
		total_reward = 0.
		info = {'termination_cause':0}
		distance_traversed = 0.
		speed_array=[]
		trackPos_array=[]
		
		print '\n\nStarting new episode...\n'

		for step in xrange(max_steps):

			# Take noisy actions during training
			if (train_indicator):
			    epsilon -= 1.0 / EXPLORE
			    epsilon = max(epsilon, epsilon_steady_state) 
			    a_t = agent.noise_action(s_t,epsilon) #Take noisy actions during training
			else:
			    a_t = agent.action(s_t)
			# a_t = np.asarray([0.0, 1.0, 0.0])		# [steer, accel, brake]

			ob, r_t, done, info = env.step(step, client, a_t, early_stop)
			if done:
				break
			# print done
			# print 'Action taken'
			analyse_info(info, printing=False)

			s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
			distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second
			speed_array.append(ob.speedX*np.cos(ob.angle))
			trackPos_array.append(ob.trackPos)


			#Checking for nan rewards: TODO: This was actually below the following block
			if (math.isnan( r_t )):
				r_t = 0.0
				for bad_r in range( 50 ):
					print( 'Bad Reward Found' )
				break #Introduced by Anirban


			# Add to replay buffer only if training
			if (train_indicator):
				agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer


			total_reward += r_t
			s_t = s_t1

			# Displaying progress every 15 steps.
			if ( (np.mod(step,15)==0) ):        
			    print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t )

			totalSteps += 1
			if done:
				break

		# Saving the best model.
		if ((save_indicator==1) and (train_indicator ==1 )):
			if (total_reward >= best_reward):
				print("Now we save model with reward " + str(total_reward) + " previous best reward was " + str(best_reward))
				best_reward = total_reward
				agent.saveNetwork()     
	
		running_avg_reward = running_average(running_avg_reward, i+1, total_reward)  


		print("TOTAL REWARD @ " + str(i) +"-th Episode  : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward))
		print("Total Step: " + str(totalSteps))
		print("")

		print info
		if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset':
			print 'Hard reset by some agent'
			ob, client = env.reset(client=client) 
		else:
			ob, client = env.reset(client=client, relaunch=True) 
		s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))

		# document_episode(i, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics)

	env.end()  # This is for shutting down TORCS
	print("Finish.")
Ejemplo n.º 25
0

#------------------------------------------------------------------------------------------------------------------#

if __name__ == "__main__":
	i=0

	if i==0:
		eng = matlab.engine.start_matlab()
		eng.launch_iros(249.44,174.5,240,173.5,0,10,0,1)
		eng.quit()
		velo = genfromtxt('vec.csv', delimiter=',')
		ster = genfromtxt('woc.csv', delimiter=',')

	ct =0
	Cs= [ snakeoil3_gym.Client(p=P) for P in [3101,3102] ]
	for step in range(Cs[0].maxSteps,0,-1):
		Cs[0].get_servers_input()
		Cs[1].get_servers_input()
		if (i==9):
			x0,y0,x_ob,y_ob,theta_in,na,mela,ob=mydrive(Cs[0],int(velo[i]),int(ster[i]),i,ct)
		else:
			mydrive(Cs[0],int(velo[i]),int(ster[i]),i,ct) 

		i+=1
		if (i==10):
			keyboard.press('p')
			eng = matlab.engine.start_matlab()
			eng.launch_iros(x0,y0,x_ob,y_ob,theta_in,na,mela,ob)
			eng.quit()
			velo = genfromtxt('vec.csv', delimiter=',')