def reset(self): """ re-initializes the environment, setting the cart back in a random position. """ Environment.reset(self) angle = random.uniform(-0.2, 0.2) pos = random.uniform(-1., 1.) self.sensors = (angle, 0.0, pos, 0.0)
def getState(self): """ returns the state one step (dt) ahead in the future. stores the state in self.sensors because it is needed for the next calculation. The sensor return vector has 4 elements: theta, theta', s, s' (s being the distance from the origin). """ Environment.getState(self) return asarray(self.sensors).flatten()
def performAction(self, action): """ stores the desired action for the next runge-kutta step. 'actionNum' discrete actions are available, which are mapped to the discrete even-spaced interval of [-50, 50] Newton. """ action = array([((2.0*action[0] / (float(self.conditions['actionNum'])-1.)) - 1.) * 50.]) Environment.performAction(self, action)
def __init__(self, topology, goal): Environment.__init__(self) self.mazeTable = topology self.goal = goal if self.initPos == None: self.initPos = self._freePos() self.initPos.remove(self.goal) self.reset()
def __init__(self, maxSteps=100): Environment.__init__(self) # initialize the environment (randomly) self.reset() self.action = 0.0 self.maxSteps = maxSteps self.centerCart = False self.generator = True
def getReward(self): Environment.getReward(self) angle = abs(self.sensors[0]) s = abs(self.sensors[2]) reward = 0 if angle < 0.05 and (s < 0.05 or not self.centerCart): reward = +2. elif angle > 0.7 or s > 2.4: reward = 0. else: if self.centerCart: reward = max(+1., 2*(1.-s)) else: reward = +1. return reward
def reset(self): """ return to initial position (stochastically): """ Environment.reset(self) self.bang = False self.perseus = choice(self.initPos) self.state = self._lookAround()
def reset(self): Environment.reset(self) self.state = np.random.uniform(3, 4)
def episodeFinished(self): Environment.episodeFinished(self) return self.timestep >= 1
def __init__(self): Environment.__init__(self) self.reset()
def reset(self): Environment.reset(self) self.state = random.uniform(-1, 1, 1) self.target = array([0.4])
def resetToState(self, state): Environment.reset(self) self.sensors = state
def performAction(self, action): """ stores the desired action for the next runge-kutta step. Actions are expected to be between -50 and 50. """ action = clip(asarray(action), -50., 50.) Environment.performAction(self, action)