def step(self, a): """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """ s = self.state position, velocity, cflag = self.state noise = self.accelerationFactor * self.noise * \ 2 * (self.random_state.rand() - .5) velocity += ( noise + self.actions[a] * self.accelerationFactor + np.cos(self.hillPeakFrequency * position) * self.gravityFactor) velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX) position += velocity position = bound(position, self.XMIN, self.XMAX) if position <= self.XMIN and velocity < 0: velocity = 0 # Bump into wall r = self.STEP_REWARD #------------- calculate the reward and update the last dimension of state -------------# ### A. abs(distance) # if cflag < self.FlagNum: # dist_s = np.absolute( s[0] - self.FlagPos[int(cflag)] ) # dist_ns = np.absolute( position - self.FlagPos[int(cflag)] ) # if dist_ns < dist_s: # r += self.FlagHeight[int(cflag)]/dist_s # #---- if cflag is updated ----# # ### (2) the agent is near the flag # if np.absolute(self.FlagPos[int(cflag)] - position) <= 0.1: # cflag += 1 # # print('collect flag ', cflag) # ### (1) the agent must pass the flag # # p_low, p_high = min(s[0], position), max(s[0], position) # # if self.FlagPos[int(cflag)] >= p_low and self.FlagPos[int(cflag)] <= p_high: # # cflag += 1 ### B. sqr(distance) if cflag < self.FlagNum: distance_s = s[0] - self.FlagPos[int(cflag)] exponent_s = 0.5 * distance_s**2 / self.FlagWid[int(cflag)] phi_s = self.FlagHeight[int(cflag)] * np.exp(-exponent_s) distance_ns = position - self.FlagPos[int(cflag)] exponent_ns = 0.5 * distance_ns**2 / self.FlagWid[int(cflag)] phi_ns = self.FlagHeight[int(cflag)] * np.exp(-exponent_ns) r += self.discount_factor * phi_ns - phi_s #---- if cflag is updated ----# p_low, p_high = min(s[0], position), max(s[0], position) if self.FlagPos[int(cflag)] >= p_low and self.FlagPos[int( cflag)] <= p_high: cflag += 1 # print(cflag) ns = np.array([position, velocity, int(cflag)]) self.collectedFlags = int(cflag) self.state = ns.copy() terminal = self.isTerminal() if terminal: r += self.GOAL_REWARD return r, ns, terminal, self.possibleActions()
def step(self, a): """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """ s = self.state position, velocity, cflag = self.state noise = self.accelerationFactor * self.noise * \ 2 * (self.random_state.rand() - .5) velocity += (noise + self.actions[a] * self.accelerationFactor + np.cos(self.hillPeakFrequency * position) * self.gravityFactor) velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX) position += velocity position = bound(position, self.XMIN, self.XMAX) if position <= self.XMIN and velocity < 0: velocity = 0 # Bump into wall #------------- calculate the reward and update the last dimension of state -------------# r = self.STEP_REWARD if cflag < self.FlagNum: dist_s = np.sum( np.absolute( s[0:-1] - self.FlagPos[int(cflag)] ) ) ns_pv = np.array([position, velocity]) dist_ns = np.sum( np.absolute( ns_pv - self.FlagPos[int(cflag)] ) ) if dist_ns < dist_s: r += self.FlagHeight[int(cflag)]/dist_s/100 p_low, p_high = min(s[0], position), max(s[0], position) v_low, v_high = min(s[1], velocity), max(s[1], velocity) if np.all(self.FlagPos[int(cflag)] >= np.array([p_low, v_low])) and \ np.all(self.FlagPos[int(cflag)] <= np.array([p_high, v_high])): cflag += 1 # print('collect flag ', cflag) # if cflag < self.FlagNum: # exponent_s = np.divide(np.sum(0.5*(s[0:-1]-self.FlagPos[int(cflag)])**2), self.FlagWid[int(cflag)]) # phi_s = np.multiply(self.FlagHeight[int(cflag)], np.exp(-exponent_s)) # ns_pv = np.array([position, velocity]) # exponent_ns = np.divide(np.sum(0.5*(ns_pv-self.FlagPos[int(cflag)])**2), self.FlagWid[int(cflag)]) # phi_ns = np.multiply(self.FlagHeight[int(cflag)], np.exp(-exponent_ns)) # r += self.discount_factor * phi_ns - phi_s # p_low, p_high = min(s[0], position), max(s[0], position) # v_low, v_high = min(s[1], velocity), max(s[1], velocity) # if np.all(self.FlagPos[int(cflag)] >= np.array([p_low, v_low])) and \ # np.all(self.FlagPos[int(cflag)] <= np.array([p_high, v_high])): # cflag += 1 # if np.absolute(position - self.FlagPos[int(cflag),0]) <= 0.1 and \ # np.absolute(velocity - self.FlagPos[int(cflag),1]) <= 0.02: # cflag += 1 ns = np.array([position, velocity, int(cflag)]) self.collectedFlags = int(cflag) self.state = ns.copy() terminal = self.isTerminal() if terminal: r += self.GOAL_REWARD return r, ns, terminal, self.possibleActions()
def step(self, a): """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """ position, velocity = self.state noise = self.accelerationFactor * self.noise * \ 2 * (self.random_state.rand() - .5) velocity += (noise + self.actions[a] * self.accelerationFactor + np.cos(self.hillPeakFrequency * position) * self.gravityFactor) velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX) position += velocity position = bound(position, self.XMIN, self.XMAX) if position <= self.XMIN and velocity < 0: velocity = 0 # Bump into wall ns = np.array([position, velocity]) self.state = ns.copy() terminal = self.isTerminal() r = self.GOAL_REWARD if terminal else self.STEP_REWARD return r, ns, terminal, self.possibleActions()
def step(self, a): """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """ s = self.state position, velocity, cflag = self.state noise = self.accelerationFactor * self.noise * \ 2 * (self.random_state.rand() - .5) velocity += (noise + self.actions[a] * self.accelerationFactor + np.cos(self.hillPeakFrequency * position) * self.gravityFactor) velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX) position += velocity position = bound(position, self.XMIN, self.XMAX) if position <= self.XMIN and velocity < 0: velocity = 0 # Bump into wall # calculate the reward and update the last dimension of state r = self.STEP_REWARD if cflag < self.FlagNum: distance_s = s[0] - self.FlagPos[int(cflag)] exponent_s = 0.5 * distance_s**2 / self.FlagWid[int(cflag)] phi_s = self.FlagHeight[int(cflag)] * np.exp(-exponent_s) distance_ns = position - self.FlagPos[int(cflag)] exponent_ns = 0.5 * distance_ns**2 / self.FlagWid[int(cflag)] phi_ns = self.FlagHeight[int(cflag)] * np.exp(-exponent_ns) r += self.discount_factor * phi_ns - phi_s if (position>=self.FlagPos[int(cflag)] and s[0]<=self.FlagPos[int(cflag)]) or \ (position<=self.FlagPos[int(cflag)] and s[0]>=self.FlagPos[int(cflag)]): cflag += 1 # print(cflag) ns = np.array([position, velocity, int(cflag)]) self.collectedFlags = int(cflag) self.state = ns.copy() terminal = self.isTerminal() if terminal: r += self.GOAL_REWARD return r, ns, terminal, self.possibleActions()
def step(self, a): """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """ s = self.state position, velocity = self.state noise = self.accelerationFactor * self.noise * \ 2 * (self.random_state.rand() - .5) velocity += ( noise + self.actions[a] * self.accelerationFactor + np.cos(self.hillPeakFrequency * position) * self.gravityFactor) velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX) position += velocity position = bound(position, self.XMIN, self.XMAX) if position <= self.XMIN and velocity < 0: velocity = 0 # Bump into wall terminal = self.isTerminal() ns = np.array([position, velocity]) self.state = ns.copy() if terminal: r = self.GOAL_REWARD else: collected = self.collectedFlags if collected < self.FlagNum: distance_s = s[0] - self.FlagPos[collected] exponent_s = 0.5 * distance_s**2 / self.FlagWid[collected] phi_s = self.FlagHeight[collected] * np.exp(-exponent_s) distance_ns = ns[0] - self.FlagPos[collected] exponent_ns = 0.5 * distance_ns**2 / self.FlagWid[collected] phi_ns = self.FlagHeight[collected] * np.exp(-exponent_ns) r = self.STEP_REWARD + self.discount_factor * phi_ns - phi_s if (ns[0]>=self.FlagPos[collected] and s[0]<=self.FlagPos[collected]) or \ (ns[0]<=self.FlagPos[collected] and s[0]>=self.FlagPos[collected]): self.collectedFlags += 1 else: r = self.STEP_REWARD return r, ns, terminal, self.possibleActions()