-
Notifications
You must be signed in to change notification settings - Fork 2
/
reward.py
76 lines (56 loc) · 2.63 KB
/
reward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
__author__ = 'manuelli'
import utils
import director.objectmodel as om
import numpy as np
import matplotlib.pyplot as plt
class Reward(object):
def __init__(self, sensorObj=None, collisionThreshold=None, collisionPenalty=100.0,
actionCost=0.1, raycastCost=20.0):
if sensorObj is None or collisionThreshold is None:
ValueError("need to specify sensorObj, actionSet and collisionThreshold")
self.numRays = sensorObj.numRays
self.rayLength = sensorObj.rayLength
self.sensorObj = sensorObj
self.collisionThreshold = collisionThreshold
self.collisionPenalty = collisionPenalty
self.actionCost = actionCost
self.raycastCost = raycastCost
self.initializeRaycastRewardWeights()
self.largeConstant = 1e5*1.0
self.tol = 1e-3
self.cutoff = 20
def initializeRaycastRewardWeights(self):
self.raycastRewardWeights = np.cos(self.sensorObj.angleGrid) + 0.2
self.raycastRewardWeights = -self.raycastCost*self.raycastRewardWeights/self.raycastRewardWeights.sum()
def checkInCollision(self, raycastDistance):
if np.min(raycastDistance) < self.collisionThreshold:
return True
else:
return False
def computeReward(self, S, u):
carState, raycastDistance = S
if self.checkInCollision(raycastDistance):
return -self.collisionPenalty
reward = -self.actionCost*np.linalg.norm(u)
reward += self.computeRaycastReward(S, u)
return reward
def computeRaycastReward(self, S, u):
carState, raycastDistance = S
# raycastAdjusted = raycastDistance - self.collisionThreshold
# maxRangeIdx = np.where(raycastDistance > self.rayLength - self.tol)
# raycastAdjusted[maxRangeIdx] = self.largeConstant
#
# raycastAdjusted = self.setMaxRangeToLargeConstant()
inverseTruncated = utils.inverseTruncate(raycastDistance, self.cutoff, rayLength=self.rayLength,
collisionThreshold=self.collisionThreshold)
return np.dot(self.raycastRewardWeights, inverseTruncated)
def computeRewardFromFrameLocation(self, u=0.0):
carFrame = om.findObjectByName('robot frame')
raycastDistance = self.sensorObj.raycastAll(carFrame)
carState = 0.0 # just a placeholder for now
S = (carState, raycastDistance)
return self.computeReward(S, u)
def plotRaycastRewardWeights(self):
grid = np.arange(0,self.sensorObj.numRays) - self.sensorObj.numRays/2.0
plt.plot(grid, self.raycastRewardWeights)
plt.show()