/
cage.py
executable file
·580 lines (500 loc) · 22.9 KB
/
cage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
import pygame
from pygame.locals import *
def to_pygame_color(color_in):
r = color_in[0];
g = color_in[1];
b = color_in[2];
nan = float('nan')
if (r == nan or g == nan or b == nan):
return pygame.Color(int(color_in[0]*255), int(color_in[1]*255), int(color_in[2]*255))
else:
return pygame.Color(40,40,40)
import pymunk
collision_shape_counter = 0
def get_new_collision_group_counter():
global collision_shape_counter
collision_shape_counter += 1
if collision_shape_counter == 0: collision_shape_counter += 1
return collision_shape_counter
import numpy
__author__ = 'AH'
from pybrain.utilities import abstractMethod
from pybrain.rl.environments.twoplayergames.twoplayergame import CompetitiveEnvironment
class Thing(object): #TODO: make derive from ParameterContainer?
def __init__(self, env):
env.embedThing(self)
@property
def color(self): #Tuple of r, g, b
abstractMethod()
@property
def position(self):
abstractMethod()
def updateState(self, env, dt): #dt in seconds
abstractMethod()
def draw(self, env):
abstractMethod()
def removeFromEnv(self, env): #Called upon removing
abstractMethod()
def embedInEnv(self, env): #Called upon adding to Env
pass
class StaticLines(Thing):
def __init__(self, env, line_points, radius, color = (1.,1.,1.), friction = 0.1, elasticity = 0.25):
#Line_points is a list of 2d points (tuples)
self.color_in = color
self.body = pymunk.Body(pymunk.inf, pymunk.inf) #Make indefinite heavy body
lp_old = None
self.lines = []
index = 0
for lp in line_points: #Expand to individual lines
if index > 0:
self.lines.append(pymunk.Segment(self.body, lp_old, lp, radius))
self.lines[-1].owner = self
self.lines[-1].friction = friction
self.lines[-1].elasticity = elasticity
lp_old = lp
index += 1
Thing.__init__(self, env)
def removeFromEnv(self, env):
env.space.remove_static(self.lines)
def embedInEnv(self, env):
env.space.add(self.lines)
def updateState(self, env, dt):
return None #Do nothing
def draw(self, env):
for line in self.lines:
body = line.body
p1 = env.transformPoint2Screen(body.position + line.a.rotated(body.angle))
p2 = env.transformPoint2Screen(body.position + line.b.rotated(body.angle))
thick = int(max(env.transformDimension2Screen(line.radius) * 2, 1.0))
c = to_pygame_color(self.color)
p1 = (int(p1[0]), int(p1[1]))
p2 = (int(p2[0]), int(p2[1]))
pygame.draw.line(env.surface, c, p1, p2, thick)
pygame.draw.circle(env.surface, c, p1, thick/2)
pygame.draw.circle(env.surface, c, p2, thick/2)
@property
def color(self): return self.color_in
@property
def position(self): #return start of the first line
return self.lines[0].body.position
################################################
class DynamicThing(Thing):
body = None
shape = None
def __init__(self, env, position=(0., 0.), *args):
#Be sure to be ready before registering back to the env
self._initPhysics(*args)
self.body.position = position
self.shape.friction = 0.1 #Defaults
self.shape.elasticity = 0.5
Thing.__init__(self, env)
def _initPhysics(self, *args): #Responsibility is on the superclasses to create a shape and body
abstractMethod()
def embedInEnv(self, env):
env.space.add(self.body, self.shape)
# Mark the owner
self.shape.owner = self
def removeFromEnv(self, env):
#del self.shape.owner
env.space.remove(self.body, self.shape)
@property
def position(self): #return start of the first line
return self.body.position
#### Ball with reasonable defaults
class Ball(DynamicThing):
def __init__(self, env, position=(0., 0.), radius = 0.25, density = 30, color = (1.,1.,1.), outline=(0.5,0.5,0.5)):
self.color_in = color
self.outline = outline
self.radius = radius
self.look_at = pymunk.Vec2d(1,0)
DynamicThing.__init__(self, env, position, density)
def _initPhysics(self, *args):
mass = self.radius**2*3.1416 * args[0]
inertia = pymunk.moment_for_circle(mass, 0, self.radius)
self.body = pymunk.Body(mass, inertia)
self.shape = pymunk.Circle(self.body, self.radius)
def updateState(self, env, dt):
return None #Do nothing
def setLookAt(self, vec): #Determines the orientation line in world space
self.look_at = vec
def draw(self, env):
radius = int(env.transformDimension2Screen(self.radius))
pos = env.transformPoint2Screen(self.body.position)
c = to_pygame_color(self.color)
pos = (int(pos[0]), int(pos[1]))
pygame.draw.circle(env.surface, c, pos, int(radius))
if self.outline != None:
if radius >= 4:
thick = int(max(min(radius / 12,3),1))
if radius > 10: radius +=1
pygame.draw.circle(env.surface, to_pygame_color(self.outline), pos, radius, thick)
pos2 = env.transformPoint2Screen((self.look_at*self.radius).rotated(self.body.angle)+self.body.position)
pos2 = (int(pos2[0]), int(pos2[1]))
pygame.draw.line(env.surface, to_pygame_color(self.outline), pos, pos2, thick)
@property
def color(self): return self.color_in
class Creature(Thing):
def __init__(self, env, position=(0., 0.), energy = 1.0):
Thing.__init__(self, env)
self.energy = energy
class Food(Ball):
def __init__(self, env, position=(0., 0.)):
Ball.__init__(self, env, position, 0.2, 20, (1.0,1.0,0))
#SensorRay querying information on a contained linespace about the environment
class SensorRay(object):
def __init__(self, inner_radius, radius, angle):
self.setInitParams(inner_radius, radius, angle)
def setInitParams(self, inner_radius, radius, angle, fadeout = 0.60):
self.inner_radius = inner_radius #This space will be ignored, ray starting outside inner_radius
self.radius = radius
self.angle = angle
self.fadeout = fadeout
#The buffered results
self._zero()
def _zero(self):
self.owner_hit = None
self.normal_hit = None
self.results = numpy.array([0.]*4)
self.results[3] = 1.
def updateSegment(self, pos, orientation, flipy):
direction = orientation.rotated(self.angle) #0 Degrees means pointing to the right
if flipy: direction.y *= -1.
self.start = direction * self.inner_radius + pos
self.end = direction * self.radius + pos
def processSegment(self, env, ignore_group):
sq = env.space.segment_query_first(self.start, self.end, group = ignore_group)
if sq == None:
self.owner_hit = None
self.normal_hit = None
self.results[3] = 1.
self.results[0:3] *= self.fadeout #But fade out the color to give it some kind of memory
return
if sq.shape.owner == None: return #We don't know that thing...
self.normal_hit = sq.n
self.owner_hit = sq.shape.owner
self.results[:-1] += (self.owner_hit.color-self.results[:-1])*self.fadeout
self.results[3] = sq.t
def draw(self, env):
p1 = env.transformPoint2Screen(self.start)
p2 = env.transformPoint2Screen(self.end)
c = to_pygame_color(self.color_hit)
pygame.draw.circle(env.surface, c, (int(p2[0]), int(p2[1])), 1) #Draw dot at outermost sensorpoint
if self.owner_hit != None:
p3 = self.start.interpolate_to(self.end, self.distance)
p3 = env.transformPoint2Screen(p3)
pygame.draw.aaline(env.surface, c, p1, p3)
@property
def color_hit(self):
return tuple(self.results[0:3])
@property
def distance(self):
return self.results[3]
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure import * #RecurrentNetwork, LinearLayer, SigmoidLayer, FullConnection
from pybrain.rl.environments import Task, EpisodicTask
class Living(Creature): #Gets a brain and processing interfaces
def __init__(self, indim, outdim, env, position=(0., 0.), energy = 1.0):
Creature.__init__(self, env, position, energy)
self.indim = indim
self.outdim = outdim
self.inbuf = numpy.array([0.]*self.indim) #All the sensory and of course mostly dynamic inputs to the brain
self.outbuf = numpy.empty(self.outdim) #The processed and to be used output
self._createBrain()
def _createBrain(self): #Standard neural net, overwrite as you like for individual brains
self.brain = buildNetwork(self.indim, int(self.indim*0.3), self.outdim, bias=True, outputbias=False, recurrent=False) #30% of indim is hiddenlayer neurons
class TaskLiving(EpisodicTask):
def __init__(self, env, living, start_point = (0,0), max_samples = 5000):
self.start_point = start_point
self.living = living
Task.__init__(self, env)
self.max_samples = max_samples
@property
def indim(self):
return self.living.outdim
@property
def outdim(self):
return self.living.indim
def reset(self):
#self.living.body.position = self.start_point
self.cumreward = 0
self.samples = 0
def performAction(self, action):
""" Execute one action. """
#Task.performAction(self, action) #Must be done asynchronous!
self.living.outbuf = action
self.addReward()
self.samples += 1
def getReward(self):
""" Compute and return the current reward (i.e. corresponding to the last action performed) """
return abstractMethod()
def getObservation(self):
""" A filtered mapping to getSample of the underlying environment. """
return self.living.inbuf
def isFinished(self):
if self.samples >= self.max_samples: return True
return False
class Eater(Ball, Living):
### outbuf ###
#0: accel left, right (-1. to 1.)
#1: jump (if >= 0.5) creature tries a jump (must be on solid ground)
#2: reserved (speech?!)
#3: red color
#4: green color
#5: blue color, each from 0. to 1.
### inbuf ###
#0: energy
#1: accel
#... sensors
decay = 0.00
max_force = 200.
jump_impulse = 140.
sensors_front = 3
sensor_radius = 10.
did_jump = False
def __init__(self, env, position=(0., 0.), energy = 1.0, fixed_color = None):
Ball.__init__(self, env, position, 0.25, 50, (1.,1.,1.), (0.5,0.5,0.5))
self.fixed_color = fixed_color #You could fix the color manually (reducing agents' variety)
#self.body.damping = self.drag
#self.shape.group = get_new_collision_group_counter() #Every collision-aware individual should get it's own id #
self._setSensorRays(0, 3.1416/3, self.sensors_front)
self.jump_time = 0.
Living.__init__(self, 2+(2+self.sensors_front)*4, 6, env, position, energy)
def _setSensorRays(self,start_angle, end_angle, nr):
self.sensors = []
self.sensors.append(SensorRay(self.radius*0.5, self.sensor_radius, -3.1416/2)) #first is always down, second is always up
self.sensors.append(SensorRay(self.radius*0.5, self.sensor_radius, 3.1416/2))
for i in range(nr):
self.sensors.append(SensorRay(self.radius*0.5, self.sensor_radius, start_angle + (end_angle-start_angle)/nr*i))
def _createBrain(self): #Standard neural net, overwrite as you like for individual brains
n = RecurrentNetwork()#buildNetwork(self.indim, 18, self.outdim, bias=False, outputbias=False, recurrent=True, hiddenclass=TanhLayer)
n.addInputModule(LinearLayer(self.indim, name='in'))
n.addModule(TanhLayer(8*4, name='hidden0'))
n.addModule(TanhLayer(12,"hidden1"))
n.addModule(BiasUnit(name="bias"))
n.addOutputModule(LinearLayer(self.outdim, name='out'))
n.addModule(LSTMLayer(8, False, "Memory"))
n.addConnection(FullConnection(n['in'], n['hidden1'])) #1
n.addConnection(FullConnection(n['bias'], n['Memory']))
n.addConnection(FullConnection(n['hidden1'], n['Memory']))
n.addConnection(FullConnection(n['Memory'], n['hidden0']))
n.addConnection(FullConnection(n['hidden0'],n['out']))
#n.addRecurrentConnection(FullConnection(n['hidden0'], n['hidden0']))
n.addRecurrentConnection(FullConnection(n['hidden0'], n['Memory']))
#n.addRecurrentConnection(FullConnection(n['hidden0'], n['Memory']))
#n.addRecurrentConnection(FullConnection(n['hidden1'],n['hidden0']))
#n.addRecurrentConnection(FullConnection(n['hidden0'],n['hidden0']))
n.sortModules()
self.brain = n
def updateState(self, env, dt):
self.did_jump = False
self.energy -= dt*self.decay
#Keep it straight
self.body.angular_velocity = 0.
#Orientation
acc = self.acceleration
#if acc < -0.01 and self.look_at.x > 0.: #face left, only change when relevant margin of > 0.1 is reached
if self.body.velocity.x < -0.01:
if self.look_at.x > 0:
self.setLookAt(pymunk.Vec2d(-1,0))
for s in self.sensors[2:]: s.results[0:3] = 0 #clear color sensors
#elif acc > 0.01 and self.look_at.x < 0:
elif self.body.velocity.x > 0.01 :
if self.look_at.x < 0:
self.setLookAt(pymunk.Vec2d(1,0))
for s in self.sensors[2:]: s.results[0:3] = 0 #clear color sensors
elif acc < 0:
if self.look_at.x > 0:
self.setLookAt(pymunk.Vec2d(-1,0))
for s in self.sensors[2:]: s.results[0:3] = 0
else:
if self.look_at.x < 0:
self.setLookAt(pymunk.Vec2d(1,0))
for s in self.sensors[2:]: s.results[0:3] = 0
# else: self.setLookAt(pymunk.Vec2d(1,0))
#Acceleration
self.body.reset_forces()
force = self.max_force * self.acceleration
force = pymunk.Vec2d(self.body.rotation_vector) * force
if force != float('nan'):
self.body.apply_force(force)
#Update sensors
for s in self.sensors:
flipy = False
if self.look_at.x < 0: flipy = True
s.updateSegment(self.body.position, self.look_at, flipy)
s.processSegment(env, self.shape.group)
s.draw(env)
#Jump
if self.jump and self.jump_time > 0.3:
if self.sensors[0].distance < 0.005*self.sensors[0].radius:
self.body.apply_impulse(pymunk.Vec2d(0,self.jump_impulse))
self.did_jump = True
self.jump_time = 0
self.jump_time += dt
self._fillInBuf()
def _fillInBuf(self):
self.inbuf[0] = self.energy
self.inbuf[1] = self.body.velocity.x #Reserved
i = 0
for s in self.sensors:
self.inbuf[2+i*4+0] = s.results[0]#-numpy.log(1.-s.results[0]+0.1)
self.inbuf[2+i*4+1] = s.results[1]#-numpy.log(1.-s.results[1]+0.1)
self.inbuf[2+i*4+2] = s.results[2]#-numpy.log(1.-s.results[2]+0.1)
#self.inbuf[2+i*4+3] = -numpy.log(s.results[3])
i +=1
#self.outbuf = self.brain.activate(self.inbuf)
@property
def color(self):
if self.fixed_color == None:
return tuple(numpy.clip((self.outbuf[3:6]+1.0)/2., 0., 1.))
return self.fixed_color
@property
def acceleration(self):
ac = numpy.clip(self.outbuf[0], -1., 1.)
return float(ac) #!
@acceleration.setter
def acceleration(self, lr): self.outbuf[0] = lr
@property
def jump(self):
if self.outbuf[1] < 0.5: return False
return True
@jump.setter
def jump(self, j):
if j: self.outbuf[1] = 1.0
else: self.outbuf[1] = -1.0
def collision_func(space, arbiter, env, methodname):
handle = True
if hasattr(arbiter.shapes[0], "collision_receiver"):
if hasattr(arbiter.shapes[1], "owner"):
collidee = arbiter.shapes[1].owner
handle = handle and arbiter.shapes[0].collision_receiver.__getattribute__(methodname)(collidee, arbiter.contacts[:])
if hasattr(arbiter.shapes[1], "collision_receiver"):
if hasattr(arbiter.shapes[0], "owner"):
collidee = arbiter.shapes[0].owner
handle = handle and arbiter.shapes[1].collision_receiver.__getattribute__(methodname)(collidee, arbiter.contacts[:])
return handle
return True
class CageEnvironment():
things = []
steps = 0
def __init__(self, surface, gravity = 9.81, xdim = 200., focus = (0.,0.)):
self.space = pymunk.Space()
self.space.gravity = (0., -gravity)
self.space.add_collision_handler(0, 0, lambda space, arbiter, env: collision_func(space, arbiter, env, "receiveOnBegin"), None, None, lambda space, arbiter, env: collision_func(space, arbiter, env, "receiveOnSeparate"), self)
#Limit the callbacks a little bit
self.surface = surface
self.setViewPort(xdim, focus)
def setViewPort(self, xdim, focus):
if xdim != None: self.scale = self.surface.get_width() / xdim
self.offset_x = self.surface.get_width() / 2. - focus[0]*self.scale
self.offset_y = self.surface.get_height() / 2. + focus[1]*self.scale
def transformPoint2Screen(self, point):
return point[0]*self.scale + self.offset_x, -point[1]*self.scale + self.offset_y
def transformScreen2Point(self, screenpoint):
return (screenpoint[0]-self.offset_x) / self.scale, (screenpoint[1]-self.offset_y) / self.scale
def transformDimension2Screen(self, dim):
return dim * self.scale
def embedThing (self, th): #Creatures and other things usually embed themselves upon construction
if self.things.count(th) > 0: return self.things.index(th)
self.things.append(th)
th.embedInEnv(self)
return len(self.things)-1
def removeThing(self, th):
self.things.remove(th) #... and remove themselves (from the darwin pool :) if necessary
th.removeFromEnv(self)
def processTimeStep(self, timestep):
#Update all Things' states
for thing in self.things:
thing.updateState(self, timestep)
#Bring physics forward
self.space.step(timestep)
def drawThings(self):
for thing in self.things: thing.draw(self)
class TaskAvoidRed(TaskLiving):
min_reward = 0.
success = False
failure = False
samples_success_hysteresis = 15
def getReward(self):
#Error calculation
ib = self.living.inbuf
e = 0
reds = ib[2] * (2-ib[5]) + (ib[6] + ib[10] + ib[14] + ib[18]) / 2
reds = ib[2] * (ib[5]) + (ib[6]*(ib[9]) + ib[10]*(ib[13]) + ib[14]*(ib[17]) + ib[18]*(ib[21])) / 2
e = -reds*2
#Energy decrease and reward calculation
self.living.energy += (min(e/5000,0.02))/2#+0.0001
reward = e*30 + 25
self.min_reward = min(reward,self.min_reward)
if reward > 20 and self.success == False:
self.countdown() #We have met a successful condition, start counting
self.failure = False
self.success = True
#if reward < -250. and self.failure == False:
#self.countdown()
#self.living.energy -= 0.5
#self.success = False
#self.failure = True
return reward #+ a fixed value to retain or give it some "braveness" !
def countdown(self):
self.max_samples = self.samples_success_hysteresis
self.samples = 0
def reset(self):
TaskLiving.reset(self)
self.success = False
self.failure = False
self.min_reward = 0.
self.max_samples = 10000
from cage2 import CollisionReceiver
class TaskEat(TaskLiving, CollisionReceiver):
success = False
failure = False
def __init__(self, env, living):
TaskLiving.__init__(self, env, living)
CollisionReceiver.__init__(self, living.shape)
self.living.decay = 0.01
self.collected = 0.
def receiveOnBegin(self, thing, contacts):
if isinstance(thing, Food):
# self.env.removeThing(thing)
self.living.energy += 0.1
self.collected += 1.
self.countdown()
return False
return True
def countdown(self):
self.max_samples = 10
self.samples = 0
def getReward(self):
reward = 0.0
for s in self.living.sensors:
if isinstance(s.owner_hit, Food):
reward += 0.0# * (1. - s.results[3])
if isinstance(self.living.sensors[0].owner_hit, Food) and self.living.did_jump:
reward -= 1
#reward -= 0.1
reward += 100 * self.collected
self.collected *= 0.8
return reward*100.
def reset(self):
TaskLiving.reset(self)
self.success = False
self.failure = False
self.max_samples = 100000
class TaskAdoptColorDown(TaskLiving):
max_samples = 100
def getReward(self):
reward = 0
err = 0
ib = self.living.inbuf
v2 = (ib[1]**2)
if v2 < 0.5**2:
err += (0.5*2-ib[1])**2 - 0.5**2
else: reward += ib[1]**2 - 0.5**2
r, g, b = ib[2], ib[3], ib[4] # color down
c = self.living.color
dc = (r-c[0])**2 + (g-c[1])**2 + (b-c[2])**2
if dc > 0.5**2:
err += dc
else: reward += dc
return (reward-err)*50.