-
Notifications
You must be signed in to change notification settings - Fork 0
/
GridEnvSim.py
322 lines (277 loc) · 10.6 KB
/
GridEnvSim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import random
import sys,pygame
import gridDef
#import IPython
monsterType = gridDef.monsterType
coinType = gridDef.coinType
marioType = gridDef.marioType
XType = gridDef.XType
YType = gridDef.YType
blockType = monsterType
class Grid:
def __init__(self, size, imgSize, actionList, monsterMoveProb):
self.size = self.width, self.height = size
self.imgSize = imgSize
self.actionList = actionList
self.monsterMoveProb = monsterMoveProb
#Mario is 1
#turtle is 2
#coin is 3
#goal is 4
#empty tile is 0
self.world = {}
def start(self, numOfTurtle, numOfCoin, subgoal):
self.stepNum = 0
locList = []
for y in range(0, self.width):
for x in range(0, self.height):
self.world[(x, y)] = 0
#put mario in the middle of the world
marioLoc = (self.width/2, self.height/2)
self.world[marioLoc] = marioType
locList.append(marioLoc)
#add turtle (passenger)
#for i in range(0, numOfTurtle):
#self.world[self.getNewLoc(locList)] = monsterType
#add coin (destination)
for i in range(0, numOfCoin):
self.world[self.getNewLoc(locList)] = coinType
for i in range(0, numOfTurtle):
self.world[self.getNewLoc(locList)] = blockType
self.objective = (subgoal[0] + marioLoc[0], subgoal[1] + marioLoc[1])
#print self.getNewLoc(locList)
self.mario = pygame.image.load("mario.bmp")
self.coin = pygame.image.load("coin.bmp")
self.turtle = pygame.image.load("turtle.bmp")
self.block = pygame.image.load("block.bmp")
self.screen = pygame.Surface(self.imgSize)
return self.world
def step(self, action, isTraining):
self.stepNum = self.stepNum + 1
reward = self.updateState(action)
flag = self.isTerminal(reward, isTraining)
return reward, self.world, flag
#def find(self, type):
#for y in range(0, self.width):
#for x in range(0, self.height):
#if self.world[(x, y)] == type:
#return (x, y)
#return (-1, -1)
def find(self, type):
res = []
for y in range(0, self.width):
for x in range(0, self.height):
if self.world[(x, y)] == type:
res.append((x, y))
return res
def getNewLoc(self, locList):
#WARNING! this function may not stop
while True:
locX = min(int(random.random()*self.width), self.width -1);
locY = min(int(random.random()*self.height), self.height -1);
loc = (locX, locY)
if locList.count(loc) == 0:
locList.append(loc)
return loc
def dump(self):
for y in range(0, self.width):
for x in range(0, self.height):
print self.world[(x, y)]," ",
print ""
def count(self, type):
counter = 0
for y in range(0, self.width):
for x in range(0, self.height):
if self.world[(x, y)] == type:
counter += 1
return counter
def isTerminal(self, reward, isTraining):
marioLocList = self.find(marioType)
if marioLocList != []:
marioLoc = marioLocList[0]
else:
assert 0
#monsterLoc = self.find(monsterType)
if not isTraining:
if self.count(coinType) == 0: #no coins available
return True
if marioLoc[0] == self.objective[0] and marioLoc[1] == self.objective[1]:
#print "goal reached"
return True
if self.stepNum > 2: #mario needs to acheive its goal in a very short time
#print "Too long"
return True
if reward == -30:
return True
return False
def updateState(self, action):
reward = -0.1
marioLocList = self.find(marioType)
if marioLocList != []:
marioOldLoc = marioLocList[0]
else:
IPython.Shell.IPShellEmbed()()
assert 0
self.world[marioOldLoc] = 0
#move Monster
#monLocList = self.find(monsterType)
#if monLocList != []:
#for monLoc in monLocList:
#if random.random() < self.monsterMoveProb:
#diffMon = (marioOldLoc[0] - monLoc[0], marioOldLoc[1] - monLoc[1])
#monAction = (0, 0)
#if diffMon[0] > 0:
#monAction = (1, 0)
#elif diffMon[1] > 0:
#monAction = (0, 1)
#elif diffMon[0] < 0:
#monAction = (-1, 0)
#elif diffMon[1] < 0:
#monAction = (0, -1)
#monNewLoc =(monLoc[0]+monAction[0], monLoc[1]+monAction[1])
#self.world[monLoc] = 0
#self.world[monNewLoc] = monsterType
#move Mario
if random.random() < 0.1:
#select randomly
action = self.actionList[int(random.random()*len(self.actionList))]
marioNewLoc = (marioOldLoc[0]+action[0], marioOldLoc[1]+action[1])
#check Mario stays in the boundary
if not marioNewLoc in self.world:
reward = 0 #don't punish it
marioNewLoc = marioOldLoc
#check if Mario eats coin
if self.world[marioNewLoc] == coinType:
reward = 20
#elif self.world[marioNewLoc] == monsterType or self.world[marioOldLoc] == monsterType:
##meet turtle
#reward = -30
elif self.world[marioNewLoc] == blockType:
#meet block, stay put
marioNewLoc = marioOldLoc
reward = -25
#add a small reward to reward the agent who reaches the subgoal
if marioNewLoc[0] == self.objective[0] and marioNewLoc[1] == self.objective[1]:
reward = reward + 10
self.world[marioNewLoc] = 1
return reward
def getScreen(self):
white = 255,255,255
black = 0, 0, 0
self.screen.fill(black)
#draw grid
pygame.draw.rect(self.screen, white, pygame.Rect(0, 0, self.imgSize[0], self.imgSize[1]), 2)
xLine = self.width*4;
yLine = self.height*4;
incX = self.imgSize[0]/xLine*4;
incY = self.imgSize[0]/yLine*4;
for x in range(0, self.imgSize[0], incX):
pygame.draw.line(self.screen, white, (x, 0), (x, self.imgSize[1]), 2);
for y in range(0, self.imgSize[1], incY):
pygame.draw.line(self.screen, white, (0, y), (self.imgSize[0], y), 2);
#draw objects
offsetX = incX /2
offsetY = incY /2
for y in range(0, self.width):
for x in range(0, self.height):
if self.world[(x, y)] == 1:
rect = self.mario.get_rect()
rect.center = (offsetX + x*incX, offsetY + y*incY)
self.screen.blit(self.mario, rect)
#if self.world[(x, y)] == 2:
#rect = self.turtle.get_rect()
#rect.center = (offsetX + x*incX, offsetY + y*incY)
#self.screen.blit(self.turtle, rect)
if self.world[(x, y)] == 3:
rect = self.coin.get_rect()
rect.center = (offsetX + x*incX, offsetY + y*incY)
self.screen.blit(self.coin, rect)
if self.world[(x, y)] == blockType:
rect = self.block.get_rect()
rect.center = (offsetX + x*incX, offsetY + y*incY)
self.screen.blit(self.block, rect)
return self.screen
def Save(agent, filename):
import pickle
output = open(filename, 'wb')
pickle.dump(agent, output)
output.close()
def Load(filename):
import pickle
input = open(filename, 'rb')
return pickle.load(input)
import RelationalQ
import tool
if __name__ == "__main__":
discrete_size = 8
objSet = (10, 1)
monsterMoveProb = 0.3
isEpisodeEnd = False
maxStep = 5000
frameRate = 5000
isShow = False
size = 800, 800
gridSize = (discrete_size, discrete_size)
delay = 100
interval = 50
pygame.init()
pygame.key.set_repeat(delay, interval)
clock=pygame.time.Clock()
screen = pygame.display.set_mode(size)
actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
controller = RelationalQ.RelationalQ(0.1, 0.1, 0.9, actionList)
env = Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb)
numOfTurtle = objSet[0]
numOfCoin = objSet[1]
print "# coin ", numOfCoin
print "# Turtle ", numOfTurtle
print "isEpisodeEnd ", isEpisodeEnd
isTraining = not isEpisodeEnd
count = 0
totalReward = 0
rewardList = []
stepCount = 0
while stepCount < maxStep:
#randomly choose a sub goal at the beginning of the episode
goalDiff = actionList[int(random.random()*len(actionList))]
world = env.start(numOfTurtle, numOfCoin, goalDiff)
objLoc = tool.getObjLoc(world, gridSize)
marioLoc = tool.getMarioLoc(world, gridSize)
goal = (marioLoc[0]+goalDiff[0], marioLoc[1]+goalDiff[1])
objLocWithGoal = tool.addGoalLoc(objLoc, goal)
ob = (marioLoc, objLocWithGoal)
action = controller.start(ob)
count += 1
prevStepCount = stepCount
episodeReward = 0
while stepCount < maxStep:
stepCount = stepCount + 1
clock.tick(frameRate)
reward, world, flag = env.step(action, isTraining)
totalReward = totalReward + reward
episodeReward = episodeReward + reward
if flag:
#print "episodeEnd: ", reward
controller.end(reward)
break
objLoc = tool.getObjLoc(world, gridSize)
marioLoc = tool.getMarioLoc(world, gridSize)
objLocWithGoal = tool.addGoalLoc(objLoc, goal)
#goalDiff = (goal[0]-marioLoc[0], goal[1]-marioLoc[1])
ob = (marioLoc, objLocWithGoal)
action = controller.step(reward, ob)
for event in pygame.event.get():
#action = 0
if event.type == pygame.QUIT: sys.exit()
if isShow:
screen.blit(env.getScreen(), (0, 0))
pygame.display.flip()
rewardList.append((prevStepCount, stepCount, episodeReward))
print totalReward
for conf in controller.agent:
print controller.agent[conf].Q
controller.dumpObj()
controller.dumpCoinAndGoal()
controller.dumpBlockAndGoal()
Save(controller, 'smart.db')
#print controller.agent