/
rl_glue_sumo_environment_1D.py
executable file
·201 lines (159 loc) · 6.15 KB
/
rl_glue_sumo_environment_1D.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
"""
This uses the skeleton_environment.py file from the Python-codec of rl-glue
as a starting point.
The class implements an environment based on the rl-glue framework and the
Simulation of Urban MObility (SUMO).
Author: Tobias Rijken
"""
import random
import sys
from rlglue.environment.Environment import Environment
from rlglue.environment import EnvironmentLoader as EnvironmentLoader
from rlglue.types import Observation
from rlglue.types import Action
from rlglue.types import Reward_observation_terminal
import subprocess
# Symlinked ~/Dropbox/UCL/MSc\ Project/TrafficControl/sumo to /usr/share/
sys.path.append('/usr/share/sumo/tools')
import traci
from sumo_utils import VehicleTimer, checkVehBirth, checkVehKill
import matplotlib.pyplot as plt
# from simulation.SimpleT.statespace import State1D as State
from simulation.SimpleX.statespace import State1D as State
class SumoEnvironment(Environment):
"""docstring for SumoEnvironment"""
def __init__(self, traffic_situation):
super(SumoEnvironment, self).__init__()
if traffic_situation == 'simpleT':
# Actions for SimpleT
self.stage01="GGgrrrrGGG"
self.inter0102="GGgrrrryyy"
self.stage02="GGGrrrrrrr"
self.inter0203="yyyrrrrrrr"
self.stage03="rrrGGGGrrr"
self.inter0301="rrryyyyrrr"
self.Stages=[self.stage01, self.stage02, self.stage03];
self.sumoConfig = "simulation/SimpleT/simpleT.sumocfg"
self.routeScript = "simulation/SimpleT/routeGenerator.py"
elif traffic_situation == 'simpleX':
# Actions for SimpleX
self.stage01="GGGGggrrrrrrGGGGggrrrrrr"
self.inter0102="yyyyggrrrrrryyyyggrrrrrr"
self.stage02="rrrrGGrrrrrrrrrrGGrrrrrr"
self.inter0203="rrrryyrrrrrrrrrryyrrrrrr"
self.stage03="rrrrrrGGGGggrrrrrrGGGGgg"
self.inter0304="rrrrrryyyyggrrrrrryyyygg"
self.stage04="rrrrrrrrrrGGrrrrrrrrrrGG"
self.inter0401="rrrrrrrrrryyrrrrrrrrrryy"
self.Stages=[self.stage01, self.stage02, self.stage03, self.stage04];
self.sumoConfig = "simulation/SimpleX/simpleX.sumocfg"
self.routeScript = "simulation/SimpleX/routeGenerator.py"
self.sumoBinary = "sumo"
self.vehicleDict = {}
self.currentVehList = []
self.previousVehList = []
self.totalCumWaitingTime = 0
self.traciPORT = 8813
def env_init(self):
return "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (0 1) ACTIONS INTS (0 {}) REWARDS (-1.0 1.0) EXTRA rl_glue_sumo_environment(Python) by Tobias Rijken.".format(len(self.Stages)-1)
def env_start(self):
# Randomly generate new routes
routeGenProcess = subprocess.Popen("python %s" % (self.routeScript), shell=True, stdout=sys.stdout)
# Start SUMO
sumoProcess = subprocess.Popen("%s -c %s --no-warnings" % (self.sumoBinary, self.sumoConfig), shell=True, stdout=sys.stdout)
traci.init(self.traciPORT)
self.state = State("1")
# Reset these variables when episodes starts
self.vehicleDict = {}
self.currentVehList = []
self.previousVehList = []
self.totalCumWaitingTime = 0
returnObs = Observation()
# returnObs.intArray = self.state.laneState.flatten()
returnObs.intArray = self.state.laneState.sum(axis=1)
self.simStep = 1
return returnObs
def env_step(self, thisAction):
# Process action
stageIndex = thisAction.intArray[0]
# print "stageIndex: {}".format(stageIndex)
traci.trafficlights.setRedYellowGreenState("1", self.Stages[stageIndex])
traci.simulationStep()
self.simStep += 1
# print "Simulation step: {}".format(self.simStep)
self.currentVehList = traci.vehicle.getIDList()
self.state.updateState(self.currentVehList)
episodeTerminal=0
# Check if state is terminal
if traci.simulation.getMinExpectedNumber() == 0:
theObs = Observation()
theObs.intArray=self.state.laneState.sum(axis=1)
episodeTerminal=1
traci.close()
theObs=Observation()
# theObs.intArray=self.state.laneState.flatten()
theObs.intArray=self.state.laneState.sum(axis=1)
# print self.state.laneState.shape
returnRO=Reward_observation_terminal()
returnRO.r=self.calculate_speed_diff()
# returnRO.r=self.calculate_delay()
# print "Reward: {}".format(returnRO.r)
returnRO.o=theObs
returnRO.terminal=episodeTerminal
killedVehicles = checkVehKill(self.vehicleDict)
for vehicle in killedVehicles:
del self.vehicleDict[vehicle]
self.previousVehList = self.currentVehList
return returnRO
def env_cleanup(self):
pass
def env_message(self, in_message):
"""
The experiment will cause this method to be called. Used
to restart the SUMO environment. Otherwise, the system will
be terminated because multiple SUMO sessions will be listening
to the same port.
"""
#WE NEED TO DO THIS BECAUSE agent_end is not called
# we run out of steps.
if in_message.startswith("episode_end"):
traci.close()
elif in_message.startswith("finish_epoch"):
traci.close()
elif in_message.startswith("start_testing"):
pass
elif in_message.startswith("finish_testing"):
traci.close()
else:
return "I don't know how to respond to your message"
def calculate_delay(self):
birthList = checkVehBirth(self.currentVehList, self.previousVehList)
# print "New born vehicles: {0}".format(birthList)
totalWaitingTime = 0
if birthList != []:
for veh in birthList:
self.vehicleDict[veh] = VehicleTimer(veh)
# print "Vehicle dictionary: {0}".format(self.vehicleDict)
for key in self.vehicleDict:
inc = self.vehicleDict[key].incrementWaitingTime()
# print "Delta for car {0}: {1}".format(key, inc)
totalWaitingTime += inc
# print "Cum. Waiting time for veh {0}: {1}".format(key, self.vehicleDict[key].cumWaitingTime)
# print "Total cumulative waiting time: {0}".format(self.totalCumWaitingTime)
# Return negative reward
self.totalCumWaitingTime += -totalWaitingTime
return -totalWaitingTime
def calculate_speed_diff(self):
"""
Returns the cumulative speed difference between the allowed speed
and the car's speed for every car
"""
cumulative_speed_diff = 0
for car in self.currentVehList:
speed_diff = traci.vehicle.getAllowedSpeed(car) - traci.vehicle.getSpeed(car)
cumulative_speed_diff += speed_diff
return -cumulative_speed_diff
def main():
EnvironmentLoader.loadEnvironment(SumoEnvironment(sys.argv[1]))
if __name__ == '__main__':
main()