def rewardFunction(self, s, a, s_n): currDist = dist(s_n, np.zeros(s_n.shape)) if currDist < .5: return (1, 1) reg = .1 * np.sum(3 - np.abs(a)) if self.a != "argmax" else 0 prev = self.prev['S'] prevOri = unitVector(prev) ori = unitVector(s_n) r_ori = abs(ori[0]) - abs(prevOri[0]) deltDist = 10 * (dist(prev, np.zeros(prev.shape)) - dist(s_n, np.zeros(s_n.shape))) return ((deltDist + r_ori - reg) / self.success, 0)
def checkPhase(self, s): if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'CROSS': if self.rob_height < .35: return (-3, 1) if self.primitive == 'PUSH_IN_HOLE': if (self.box_height < .2): d = dist(s[:2], s[4:6]) print('DISTANCE: ', d) if d < .2 and (self.hole_height > self.box_relative_height): return (10 - d * 5, 1) else: return (-3, 1) if self.primitive == 'CROSS': goal = s[4:6] d = dist(goal, np.zeros(2)) if d < .2: print('distance: ', d) return (5, 1) if self.primitive == 'REORIENT': box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] d = dist(s[:2], s[4:6]) if abs(self.box_y) > .8: # .2 return (-3, 1) if abs(goal_direction - curr_direction) < .15 or d < .2: return (5 - abs(self.box_y), 1) if self.primitive == 'PUSH_TOWARDS': d = dist(s[:2], s[4:6]) box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_ori) > .25 or abs(self.box_y) > .35: return (-2, 1) if d < .2: return (5, 1) if self.primitive == 'SLOPE_PUSH': d = dist(s[:2], s[4:6]) box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_ori) > .6 or abs(self.box_y) > .5: return (-2, 1) if d < .2: return (5, 1) return (0, 0)
def checkPhase(self, s): if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'CROSS': if self.rob_height < .35: return (-3, 1) if self.primitive == 'PUSH_IN_HOLE': if (self.box_height < .2): d = dist(s[:2], s[4:6]) print('DISTANCE: ', d) if d < .2: return ( 10 - d * 5, 1 ) # NOTE: we are training just the first phase (get box into hole) else: return (-3, 1) if self.primitive == 'CROSS': goal = s[4:6] d = dist(goal, np.zeros(2)) if d < .2: print('distance: ', d) return (5, 1) if self.primitive == 'REORIENT': box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_y) > .1: return (-3, 1) if abs(goal_direction - curr_direction) < .15: return (5 - 20 * abs(self.box_y), 1) if self.primitive == 'PUSH_TOWARDS': d = dist(s[:2], s[4:6]) box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_ori) > .25 or abs(self.box_y) > .35: return (-2, 1) if d < .2: return (5, 1) return (0, 0)
def rewardFunction(self, s_n, a): first, second = self.splitState(s_n.ravel().tolist()) fPrev, sPrev = self.splitState(self.prev['S'].ravel().tolist()) prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri = self.unpack( fPrev, first, double=True) first = Info(prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri) prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri = self.unpack( sPrev, second, double=True) second = Info(prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri) if self.phase == 1: if first.pos[2] < .35 or second.pos[2] < .35: return (-3, 1) if blockPos[-1] < .3: self.phase += 1 return (5, 0) box_r = (blockPos[0] - prevBlock[0]) - .005 * (abs(blockOri)) vel_r = dist(first.prevPos, prevBlock) - dist(first.pos, blockPos) vel_r += dist(second.prevPos, prevBlock) - dist( second.pos, blockPos) prevVec = unitVector(vector(first.prevOri)) vec = unitVector(vector(first.ori)) goal = unitVector(blockPos[:2] - first.pos[:2]) prevDot = dot(prevVec, goal) currDot = dot(vec, goal) ori_r = currDot - prevDot prevVec = unitVector(vector(second.prevOri)) vec = unitVector(vector(second.ori)) goal = unitVector(blockPos[:2] - second.pos[:2]) prevDot = dot(prevVec, goal) currDot = dot(vec, goal) ori_r += currDot - prevDot r = (25 * box_r + vel_r + 2 * ori_r) - .01 if self.phase == 2: if first.pos[2] < .35 or second.pos[2] < .35: return (-6, 1) if first.pos[0] > .45 and second.pos[0] > .45: print('Success!') return (5, 1) vel_r = first.pos[0] - first.prevPos[0] vel_r += second.pos[0] - second.prevPos[0] y_r = .1 * (abs(first.pos[1] - blockPos[1]) + abs(second.pos[1] - blockPos[1])) #ori_r = .05* (abs(first.ori) + abs(second.ori)) r = vel_r - y_r - .01 return (r, 0)
def getAux(self, pos, prevPos, blockPos, prevBlock, ori, prevOri, phase): if phase == 1: dist_r = (dist(prevPos, blockPos) - dist(pos, blockPos)) block_r = blockPos[0] - prevBlock[0] prevVec = unitVector(vector(prevOri)) vec = unitVector(vector(ori)) goal = unitVector(blockPos[:2] - pos[:2]) prevDot = dot(prevVec, goal) currDot = dot(vec, goal) ori_r = currDot - prevDot return ((block_r + dist_r + 2 * ori_r) * self.w_phase1 - .01, 0) if phase == 2: goal = np.array([.80, blockPos[1], pos[2]]) delta = dist(pos, goal) prevDelta = dist(prevPos, goal) dist_r = prevDelta - delta y_r = -abs(blockPos[1] - pos[1]) return ((dist_r + .15 * y_r - .05 * abs(ori)) * self.w_phase3 - .01, 0)
def get_goal_angle(self, state, display=False): box_location = state[:2] hole_location = state[5:7] box_to_hole = unitVector(hole_location - box_location) theta = -state[4] rotate = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) box_to_hole = np.dot(rotate, box_to_hole.reshape((-1, 1))).flatten() ori = orientation(box_to_hole) # this returns the inverse tangent of y / x. Maps -pi/2 to pi/2 # Must map to -pi to pi for v-rep comparison relative_y = box_to_hole[1] relative_x = box_to_hole[0] buff = (-np.pi if relative_y < 0 else np.pi) if relative_x < 0 else 0 # since we want to map -pi to pi ori = ori + buff if abs(relative_y) > .3: return ori # if not pointing in the general correct direction else: return 0 # otherwise you're fine
def getAngles(self, s): s = s.ravel() goal = self.goal # This is relative position of box w.r.t. the robot if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'REORIENT' or self.primitive == 'PUSH_TOWARDS': relative_y = goal[0] relative_x = -goal[1] if self.primitive == 'CROSS': relative_y = s[4] relative_x = -s[5] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi theta = np.arctan(relative_y / relative_x) + buff phi = -np.pi - theta if theta < 0 else np.pi - theta goal_angles = (theta, phi) # NOTE: Depending on the primitive, these all reference the box and some otherpoint past it as well box_from_hole = s[:2] - s[4:6] hole = s[4:6] aligned = hole - dot( hole, unitVector(box_from_hole)) * unitVector(box_from_hole) relative_x = -aligned[1] relative_y = aligned[0] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha align_y_angles = (alpha, beta, dist(aligned, np.zeros(2))) relative_y = s[4] relative_x = -s[5] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi goal1 = np.arctan(relative_y / relative_x) + buff goal2 = -np.pi - goal1 if goal1 < 0 else np.pi - goal1 cross_angles = (goal1, goal2) pos = s[:2] psi = s[3] goal_relative_to_box = np.array([self.x_contact, self.contact['left']]) rotation_matrix = np.array([[np.cos(psi), -np.sin(psi)], [np.sin(psi), np.cos(psi)]]) home = pos + rotation_matrix.dot(goal_relative_to_box) relative_y = home[0] relative_x = -home[1] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha left_angle = (alpha, beta) goal_relative_to_box = np.array( [self.x_contact, self.contact['right']]) home = pos + rotation_matrix.dot(goal_relative_to_box) relative_y = home[0] relative_x = -home[1] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha right_angle = (alpha, beta) return goal_angles, align_y_angles, cross_angles, left_angle, right_angle
def getAngles(self, s): states = self.splitState(s) box = states['box'] box_ori = box[2] angles = {} if self.phase == 1 or self.phase == 3: # these are transitioning phases reached = True for i in range(self.num_agents): to_contact = dist( states["robot" + str(i)][:2], np.array([ self.x_contact[i][self.phase], self.y_contact[self.phase][i] ])) if to_contact > .1: reached = False if reached: self.phase = (self.phase + 1) % 4 print('PHASE', self.phase) for i in range(self.num_agents): curr = states["robot" + str(i)] ori = curr[2] pos = curr[:2] # Calculating angle for STRAIGHT_BOX and CHANGE_ANGLE phi = ori - box_ori direction = -1 if self.y_contact[self.phase][i] > 0 else 1 # Special case: if you're smack in the middle, you should go back if either side is uneven direction = np.sign(box_ori) if self.y_contact[ self.phase][i] == 0 else direction goal_relative_to_box = np.array( [self.x_contact[i][self.phase], self.y_contact[self.phase][i]]) phi_trans = phi - np.pi / 2 phi_trans = phi_trans + 2 * np.pi if phi_trans < -np.pi else phi_trans goal = goal_relative_to_box - pos # this is the vector from current position to contact point all relative to box front_v = vector(phi_trans) right_v = vector(phi_trans - np.pi / 2) # Calculating angles for HOME_CONTACT relative_y = -dot(unitVector(goal), right_v) # negative for convention relative_x = dot(unitVector(goal), front_v) buff = ( -np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha contact_angles = (alpha, beta) # Boolean to determine need for translation adjustment ie. if box is too far in, tilt it towards you ratio = abs( goal[1] / goal[0] ) # ratio horizontal and vertical distance from contact point tilt = True if ratio > .8 and np.sign( goal[1] * self.y_contact[self.phase][i]) < 0 and abs( goal[0]) < .4 else False angles['robot' + str(i)] = (alpha, beta, phi, direction * box_ori, tilt) return angles