def env_start(self):
		# Randomly generate new routes
		routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout)

		# Start SUMO
		sumoProcess = subprocess.Popen("%s -c %s --no-warnings" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout)

		traci.init(self.traciPORT)
		self.state = State("1")

		# Reset these variables when episodes starts
		self.vehicleDict = {}
		self.currentVehList = []
		self.previousVehList = []
		self.totalCumWaitingTime = 0

		returnObs = Observation()
		returnObs.intArray = self.state.carState.flatten()
		self.simStep = 1

		return returnObs
Example #2
0
	def run(self, save=False):
		# Randomly generate new routes
		routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout)

		# Start SUMO
		sumoProcess = subprocess.Popen("%s -c %s" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout)
		traci.init(self.PORT)

		state = State("1")
		state1d = State1D("1")

		# Reset these variables when episodes starts
		mapCarToRoute = {}
		routeDict = defaultdict(list)
		traveltimeDict = defaultdict(int)
		traffic_light_dict = defaultdict(list)
		vehicleDict = {}
		currentVehList = []
		previousVehList = []
		totalCumWaitingTime = 0
		actionList = []
		wait_count_list = []
		previous_index = 0
		traffic_light_counter = 1
		licycle = itertools.cycle(range(len(self.Stages)))
		speedDict = defaultdict(float)
		emergency_stop_list = []
		# freeflow_dict = {'eastSouth':40, 'eastWest':42, 'westSouth':40, \
		# 	'westEast':42, 'southEast':40, 'southWest':41}
		freeflow_dict = {'eastSouth':42, 'eastWest':42, 'eastNorth':41, \
			'westEast':42, 'westSouth':41, 'westNorth':42, \
			'southEast':41, 'southWest':42, 'southNorth':42, \
			'northEast':42, 'northWest':41, 'northSouth':42}

		if self.gui:
			self.view = traci.gui.getIDList()[0]

		step = 0
		# stageIndex = 2
		# run simulation until it reaches a terminal state
		while step == 0 or traci.simulation.getMinExpectedNumber() > 0:
			# print "Step: {}".format(step)
			if step == 0:
				observation = state.carState.flatten()
				# observation = state1d.laneState.sum(axis=1)
				# print observation

			# plt.imshow(state.carState, interpolation='nearest')
			# plt.show()

			# stageIndex = self.take_action(observation)
			# if step % 10 == 0:
				# stageIndex = licycle.next()
			# if self.take_action(observation) == 0:
			# 	stageIndex = licycle.next()
			stageIndex = random.choice(range(len(self.Stages)))

			actionList.append(stageIndex)
			# print "stageIndex: {}".format(stageIndex)
			traci.trafficlights.setRedYellowGreenState("1", self.Stages[stageIndex])

			# Count time a specific stage index has been active
			if step == 0:
				previous_index = stageIndex
			if stageIndex == previous_index:
				traffic_light_counter += 1
			else:
				traffic_light_dict[previous_index].append(traffic_light_counter)
				traffic_light_counter = 1

			# Take a step
			traci.simulationStep()

			currentVehList = traci.vehicle.getIDList()
			state.updateState(currentVehList)
			# state1d.updateState(currentVehList)

			observation = state.carState.flatten()
			# observation = state1d.laneState.sum(axis=1)
			# print observation

			# Get the activations of the last hidden layer
			if self.tsne_dataset:
				# Get high level representation
				o = state.carState
				reshaped_o = self.reshape_output(o)
				self.states_shared.set_value(reshaped_o)
				activations = self.get_activations()
				# Add activations to array
				if hasattr(self, 'activation_X'):
					self.activation_X = np.vstack((self.activation_X, \
						activations))
				else:
					self.activation_X = activations

				# Get value/label
				q_val_outputs = self.network.q_vals(o)
				activation_labels = sum(q_val_outputs)
				self.label_list.append(activation_labels)

				# If value is interesting, save screen shot
				traci.gui.screenshot(self.view, 'tsne/screenshots/step_'+'{}'.format(step)+'.png')

			# Increment wait count and calculate speed diffs
			cumulative_speed_diff = 0
			wait_count = 0
			for car in currentVehList:
				if traci.vehicle.getWaitingTime(car) > 0:
					wait_count += 1
				speed_diff = traci.vehicle.getAllowedSpeed(car) - traci.vehicle.getSpeed(car)
				cumulative_speed_diff += speed_diff
			wait_count_list.append(wait_count)

			# Detect emergency stop
			total_deceleration = 0
			es_count = 0
			for vehicle in currentVehList:
				a = traci.vehicle.getSpeed(vehicle) - speedDict[vehicle]
				if a < -4.5:
					es_count += 1
					total_deceleration += a
				speedDict[vehicle] = traci.vehicle.getSpeed(vehicle)
			emergency_stop_list.append(es_count)

			# Calculate reward
			result = -cumulative_speed_diff + 100 * total_deceleration
			print "Reward: {}".format(result)

			# Increment traveltime for all cars
			for car in currentVehList:
				if car not in mapCarToRoute.keys():
					mapCarToRoute[car] = traci.vehicle.getRouteID(car)
				traveltimeDict[car] += 1

			# Add traveltime to routeDict, then delete entry from
			# the traveltimeDict
			for car in traveltimeDict.keys():
				if car not in currentVehList:
					route_id = mapCarToRoute[car]
					routeDict[route_id].append(traveltimeDict[car])
					del traveltimeDict[car]
					del mapCarToRoute[car]

			"""
			# NOW OBSOLETE
			birthList = checkVehBirth(self.currentVehList, self.previousVehList)

			if birthList != []:
				for veh in birthList:
					self.vehicleDict[veh] = VehicleTimer(veh)

			for key in self.vehicleDict:
				inc = self.vehicleDict[key].incrementWaitingTime()
				totalWaitingTime += inc

			killedVehicles = checkVehKill(vehicleDict)
			for vehicle in killedVehicles:
				del vehicleDict[vehicle]
			"""

			previousVehList = currentVehList
			previous_index = stageIndex
			
			step += 1

		delay_dict = self.calculate_delay_dict(freeflow_dict, routeDict)

		# Produce plots
		self.plot_route_dist(routeDict)
		self.plot_actions(actionList)
		self.plot_route_diagnostics(routeDict)
		self.plot_wait_count(wait_count_list)
		self.plot_travel_time_dist(routeDict)
		self.plot_traffic_light_time(traffic_light_dict)
		self.plot_emergency_count(emergency_stop_list)
		print traffic_light_dict 

		action_frequency = self.action_freq(actionList, step)
		print "Action frequency: {}".format(action_frequency)

		print "Mean delay: {}".format(self.calculate_mean_delay(delay_dict))

		if save:
			with open('eval_1D_LINEAR_T.pkl', 'wb') as f:
				cPickle.dump([routeDict, wait_count_list, \
					traffic_light_dict, action_frequency, \
					emergency_stop_list], f)

		if self.tsne_dataset:
			if not traci.simulation.getMinExpectedNumber() > 0:
				# Pickle
				time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
				with open('tsne/activations_X'+ time_str +'.pkl', 'wb') as f:
					cPickle.dump(self.activation_X, f, -1)

				with open('tsne/activations_Y'+ time_str +'.pkl', 'wb') as f:
					temp = np.asarray(self.label_list)
					cPickle.dump(temp, f)


		traci.close()
		sys.stdout.flush()
class SumoEnvironment(Environment):
	"""docstring for SumoEnvironment"""
	def __init__(self, traffic_situation):
		super(SumoEnvironment, self).__init__()

		if traffic_situation == 'simpleT':
			# Actions for SimpleT
			self.stage01="GGgrrrrGGG"
			self.inter0102="GGgrrrryyy"
			self.stage02="GGGrrrrrrr"
			self.inter0203="yyyrrrrrrr"
			self.stage03="rrrGGGGrrr"
			self.inter0301="rrryyyyrrr"

			# self.Stages=[self.stage01, self.stage02, self.stage03];
			self.Stages = [self.stage01, self.inter0102, self.stage02, \
				self.inter0203, self.stage03, self.inter0301]
			self.sumoConfig = "simulation/SimpleT/simpleT.sumocfg"
			self.routeScript = "simulation/SimpleT/routeGenerator.py"

		elif traffic_situation == 'simpleX':
			# Actions for SimpleX
			self.stage01="GGGGggrrrrrrGGGGggrrrrrr"
			self.inter0102="yyyyggrrrrrryyyyggrrrrrr"
			self.stage02="rrrrGGrrrrrrrrrrGGrrrrrr"
			self.inter0203="rrrryyrrrrrrrrrryyrrrrrr"
			self.stage03="rrrrrrGGGGggrrrrrrGGGGgg"
			self.inter0304="rrrrrryyyyggrrrrrryyyygg"
			self.stage04="rrrrrrrrrrGGrrrrrrrrrrGG"
			self.inter0401="rrrrrrrrrryyrrrrrrrrrryy"

			self.Stages=[self.stage01, self.stage02, self.stage03, self.stage04];
			self.sumoConfig = "simulation/SimpleX/simpleX.sumocfg"
			self.routeScript = "simulation/SimpleX/routeGenerator.py"

		self.sumoBinary = "sumo"

		self.vehicleDict = {}
		self.currentVehList = []
		self.previousVehList = []
		self.totalCumWaitingTime = 0
		self.speedDict = defaultdict(float)
		self.licycle = itertools.cycle(range(len(self.Stages)))
		self.stageIndex = 0 # Initialise stage index

		self.traciPORT = 8813

	def env_init(self):
		# return "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (0 1)  ACTIONS INTS (0 {})  REWARDS (-1.0 1.0)  EXTRA rl_glue_sumo_environment(Python) by Tobias Rijken.".format(len(self.Stages)-1)
		return "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (0 1)  ACTIONS INTS (0 1)  REWARDS (-1.0 1.0)  EXTRA rl_glue_sumo_environment(Python) by Tobias Rijken."

	def env_start(self):
		# Randomly generate new routes
		routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout)

		# Start SUMO
		sumoProcess = subprocess.Popen("%s -c %s --no-warnings" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout)

		traci.init(self.traciPORT)
		self.state = State("1")

		# Reset these variables when episodes starts
		self.vehicleDict = {}
		self.currentVehList = []
		self.previousVehList = []
		self.totalCumWaitingTime = 0

		returnObs = Observation()
		returnObs.intArray = self.state.carState.flatten()
		self.simStep = 1

		return returnObs

	def env_step(self, thisAction):
		# Process action
		# self.stageIndex = thisAction.intArray[0]
		if thisAction.intArray[0] == 0:
			self.stageIndex = self.licycle.next()
		# print "stageIndex: {}".format(self.stageIndex)
		traci.trafficlights.setRedYellowGreenState("1", self.Stages[self.stageIndex])

		traci.simulationStep()
		self.simStep += 1
		# print "Simulation step: {}".format(self.simStep)

		self.currentVehList = traci.vehicle.getIDList()
		self.state.updateState(self.currentVehList)

		episodeTerminal=0

		# Check if state is terminal
		if traci.simulation.getMinExpectedNumber() == 0:
			theObs = Observation()
			theObs.intArray=self.state.carState.flatten()
			episodeTerminal=1
			traci.close()
		
		theObs=Observation()
		theObs.intArray=self.state.carState.flatten()
		
		returnRO=Reward_observation_terminal()
		returnRO.r=self.calculate_reward()
		# returnRO.r=self.calculate_delay()
		# print "Reward: {}".format(returnRO.r)
		returnRO.o=theObs
		returnRO.terminal=episodeTerminal

		killedVehicles = checkVehKill(self.vehicleDict)
		for vehicle in killedVehicles:
			del self.vehicleDict[vehicle]

		self.previousVehList = self.currentVehList
		
		return returnRO

	def env_cleanup(self):
		pass

	def env_message(self, in_message):
		"""
		The experiment will cause this method to be called.  Used
		to restart the SUMO environment. Otherwise, the system will
		be terminated because multiple SUMO sessions will be listening
		to the same port.
		"""

		#WE NEED TO DO THIS BECAUSE agent_end is not called
		# we run out of steps.
		if in_message.startswith("episode_end"):
			traci.close()
		elif in_message.startswith("finish_epoch"):
			traci.close()
		elif in_message.startswith("start_testing"):
			pass
		elif in_message.startswith("finish_testing"):
			traci.close()
		else:
			return "I don't know how to respond to your message"

	def calculate_delay(self):
		birthList = checkVehBirth(self.currentVehList, self.previousVehList)
		# print "New born vehicles: {0}".format(birthList)

		totalWaitingTime = 0

		if birthList != []:
			for veh in birthList:
				self.vehicleDict[veh] = VehicleTimer(veh)
		# print "Vehicle dictionary: {0}".format(self.vehicleDict)

		for key in self.vehicleDict:
			inc = self.vehicleDict[key].incrementWaitingTime()
			# print "Delta for car {0}: {1}".format(key, inc)
			totalWaitingTime += inc
			# print "Cum. Waiting time for veh {0}: {1}".format(key, self.vehicleDict[key].cumWaitingTime)
		# print "Total cumulative waiting time: {0}".format(self.totalCumWaitingTime)

		# Return negative reward
		self.totalCumWaitingTime += -totalWaitingTime
		return -totalWaitingTime

	def calculate_speed_diff(self):
		"""
		Returns the cumulative speed difference between the allowed speed
		and the car's speed for every car
		"""
		cumulative_speed_diff = 0
		for car in self.currentVehList:
			speed_diff = traci.vehicle.getAllowedSpeed(car) - traci.vehicle.getSpeed(car)
			cumulative_speed_diff += speed_diff
		return -cumulative_speed_diff

	def identify_emergency_stop(self):
		"""
		Identifies if an emergency stop occurs and sums the decelerations
		of all the cars that make an emergency stop
		"""
		total_deceleration = 0
		for vehicle in self.currentVehList:
			a = traci.vehicle.getSpeed(vehicle) - self.speedDict[vehicle]
			if a < -4.5:
				total_deceleration += a
			self.speedDict[vehicle] = traci.vehicle.getSpeed(vehicle)
		return total_deceleration

	def calculate_reward(self, tau=100):
		"""
		Return a weighted sum of the speed diff reward and the emergency
		stop reward
		"""
		result = self.calculate_speed_diff() + \
			tau * self.identify_emergency_stop()
		return result
	def run(self, commands):
		# Randomly generate new routes
		# routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout)

		# Start SUMO
		sumoProcess = subprocess.Popen("%s -c %s" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout)
		traci.init(self.PORT)

		state = State("1")

		# Reset these variables when episodes starts
		mapCarToRoute = {}
		routeDict = defaultdict(list)
		traveltimeDict = defaultdict(int)
		traffic_light_dict = defaultdict(list)
		vehicleDict = {}
		currentVehList = []
		previousVehList = []
		totalCumWaitingTime = 0
		actionList = []
		wait_count_list = []
		previous_index = 0
		traffic_light_counter = 1
		speedDict = defaultdict(float)
		emergency_stop_list = []

		step = 0
		self.last_action = 0
		# run simulation until it reaches a terminal state
		while step == 0 or traci.simulation.getMinExpectedNumber() > 0:
			
			if step == 0:
				observation = state.carState.flatten()

			# TAKE THE HUMAN ACTION HERE
			# print "1"
			try:
				stageIndex = int(commands.get(False))
				# print "2"
			except Queue.Empty, e:
				stageIndex = self.last_action
				# print "3"
			print "Index: {}".format(stageIndex)

			if stageIndex not in self.admissable_index:
				print "admissable_index: {}".format(self.admissable_index)
				stageIndex = self.last_action
			self.last_action = stageIndex
			# print "4"

			# print "Index: {}".format(stageIndex)
			actionList.append(stageIndex)
			# print "stageIndex: {}".format(stageIndex)
			traci.trafficlights.setRedYellowGreenState("1", self.Stages[stageIndex])

			# Count time a specific stage index has been active
			if step == 0:
				previous_index = stageIndex
			if stageIndex == previous_index:
				traffic_light_counter += 1
			else:
				traffic_light_dict[previous_index].append(traffic_light_counter)
				traffic_light_counter = 1

			traci.simulationStep()

			currentVehList = traci.vehicle.getIDList()
			state.updateState(currentVehList)

			observation = state.carState.flatten()

			# Increment wait count
			wait_count = 0
			for car in currentVehList:
				if traci.vehicle.getWaitingTime(car) > 0:
					wait_count += 1
			wait_count_list.append(wait_count)

			# Detect emergency stop
			es_count = 0
			for vehicle in currentVehList:
				a = traci.vehicle.getSpeed(vehicle) - speedDict[vehicle]
				if a < -4.5:
					es_count += 1
				speedDict[vehicle] = traci.vehicle.getSpeed(vehicle)
			emergency_stop_list.append(es_count)

			# Increment traveltime for all cars
			for car in currentVehList:
				if car not in mapCarToRoute.keys():
					mapCarToRoute[car] = traci.vehicle.getRouteID(car)
				traveltimeDict[car] += 1

			# Add traveltime to routeDict, then delete entry from
			# the traveltimeDict
			for car in traveltimeDict.keys():
				if car not in currentVehList:
					route_id = mapCarToRoute[car]
					routeDict[route_id].append(traveltimeDict[car])
					del traveltimeDict[car]
					del mapCarToRoute[car]

			previousVehList = currentVehList
			previous_index = stageIndex
			
			step += 1