예제 #1
0
    def rewardFunction(self, s_n, a):
        first, second = self.splitState(s_n.ravel().tolist())
        fPrev, sPrev = self.splitState(self.prev['S'].ravel().tolist())

        prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri = self.unpack(
            fPrev, first, double=True)
        first = Info(prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri)

        prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri = self.unpack(
            sPrev, second, double=True)
        second = Info(prevPos, pos, blockPos, prevBlock, ori, prevOri,
                      blockOri)

        if self.phase == 1:
            if first.pos[2] < .35 or second.pos[2] < .35:
                return (-3, 1)
            if blockPos[-1] < .3:
                self.phase += 1
                return (5, 0)
            box_r = (blockPos[0] - prevBlock[0]) - .005 * (abs(blockOri))

            vel_r = dist(first.prevPos, prevBlock) - dist(first.pos, blockPos)
            vel_r += dist(second.prevPos, prevBlock) - dist(
                second.pos, blockPos)

            prevVec = unitVector(vector(first.prevOri))
            vec = unitVector(vector(first.ori))
            goal = unitVector(blockPos[:2] - first.pos[:2])
            prevDot = dot(prevVec, goal)
            currDot = dot(vec, goal)
            ori_r = currDot - prevDot

            prevVec = unitVector(vector(second.prevOri))
            vec = unitVector(vector(second.ori))
            goal = unitVector(blockPos[:2] - second.pos[:2])
            prevDot = dot(prevVec, goal)
            currDot = dot(vec, goal)
            ori_r += currDot - prevDot

            r = (25 * box_r + vel_r + 2 * ori_r) - .01
        if self.phase == 2:
            if first.pos[2] < .35 or second.pos[2] < .35:
                return (-6, 1)
            if first.pos[0] > .45 and second.pos[0] > .45:
                print('Success!')
                return (5, 1)

            vel_r = first.pos[0] - first.prevPos[0]
            vel_r += second.pos[0] - second.prevPos[0]

            y_r = .1 * (abs(first.pos[1] - blockPos[1]) +
                        abs(second.pos[1] - blockPos[1]))

            #ori_r = .05* (abs(first.ori) + abs(second.ori))

            r = vel_r - y_r - .01
        return (r, 0)
 def succeeded(self, s):
     if self.simulation_name == 'elevated_scene':
         return dist(
             s[:3], s[5:8]
         ) < .5 and self.box_z_global < .2 and self.bot_z_global > .3
     if self.simulation_name == 'flat_scene':
         return dist(s[:3], s[5:8]) < .4
     if self.simulation_name == 'slope_scene':
         return dist(s[:3], s[5:8]) < .4
예제 #3
0
    def reward_function(self, s):
        s = s.ravel()
        succeeded = self.succeeded(s)
        _, done = self.decide_to_restart(s)

        if succeeded:
            if self.simulation_name == 'elevated_scene':
                return 5 - dist(s[:2], s[5:7])
            if self.simulation_name == 'flat_scene':
                return 5 - abs(self.box_ori_global)
            if self.simulation_name == 'slope_scene':
                return 5 - abs(self.box_ori_global)
        if done and not succeeded:
            if self.simulation_name == 'elevated_scene' and (self.box_z_global < .2 and self.bot_z_global > .2):
                return 0
            else:
                return -5
        else:
            if type(self.prev["S"]) != np.ndarray:
                return 0
            previous_local_state = self.prev['S'].ravel()

            dist_state = 2 if self.simulation_name == 'elevated_scene' else 3
            min_dist = .5 if self.simulation_name == 'elevated_scene' else 1
            previous_distance = dist(previous_local_state[0: dist_state], previous_local_state[5: 5 + dist_state])
            curr_distance = dist(s[:dist_state], s[5: 5 + dist_state])
            d_reward = previous_distance - curr_distance

            prev_ori = self.get_goal_angle(previous_local_state)
            curr_ori = self.get_goal_angle(s, display=True)
            ori_reward = prev_ori - curr_ori if abs(s[3]) < .01 and curr_distance > min_dist else 0  # this is to keep certain calculations correct

            """prev_box_from_hole = previous_local_state[:2] - previous_local_state[5:7]
            hole = s[5:7]
            aligned = hole - dot(hole, unitVector(prev_box_from_hole)) * unitVector(prev_box_from_hole)
            prev_align = dist(np.zeros(2), aligned)
            box_from_hole = s[:2] - s[5:7]
            hole = s[5:7]
            aligned = hole - dot(hole, unitVector(box_from_hole)) * unitVector(box_from_hole)
            curr_align = dist(np.zeros(2), aligned)
            align_reward = prev_align - curr_align"""

            """prev_distance_to_box = dist(np.zeros(3), previous_local_state[:3])
            distance_to_box = dist(np.zeros(3), s[:3])
            box_reward = prev_distance_to_box - distance_to_box"""

            """if self.prev_action_was_valid:
                return -.05
            else:
                return -.3"""
            if self.prev_action_was_valid:
                return 3 * np.round(.5 * np.round(d_reward, 2) + .5 * np.round(ori_reward, 2), 3) - .1
            else:
                return -.3
예제 #4
0
 def succeeded(self, s):
     assert type(self.simulation_name) == str
     if self.has_box_in_simulation:
         if self.simulation_name == 'elevated_scene':
             return dist(s[:2], s[5:7]) < .3 and self.box_z_global < .2 and self.bot_z_global > .3
         if self.simulation_name == 'flat_scene':
             return dist(s[:3], s[5:8]) < .3 and abs(self.box_ori_global) < .3  # last part is added
         if self.simulation_name == 'slope_scene':
             return dist(s[:3], s[5:8]) < .3 and abs(self.box_ori_global) < .3  # last part is added
     else:
         return dist(s[5: 8], np.zeros(3)) < .2
예제 #5
0
 def rewardFunction(self, s, a, s_n):
     currDist = dist(s_n, np.zeros(s_n.shape))
     if currDist < .5:
         return (1, 1)
     reg = .1 * np.sum(3 - np.abs(a)) if self.a != "argmax" else 0
     prev = self.prev['S']
     prevOri = unitVector(prev)
     ori = unitVector(s_n)
     r_ori = abs(ori[0]) - abs(prevOri[0])
     deltDist = 10 * (dist(prev, np.zeros(prev.shape)) -
                      dist(s_n, np.zeros(s_n.shape)))
     return ((deltDist + r_ori - reg) / self.success, 0)
 def decide_to_restart(self, s):
     # if far away from box, far away from goal, box dropped, or bot dropped
     if self.simulation_name == 'elevated_scene':
         return dist(s[:3], np.zeros(3)) > 3.5 or dist(
             s[5:8], np.zeros(3)
         ) > 3.5 or self.box_z_global < .2 or self.bot_z_global < .3 or self.currReward <= -20
     if self.simulation_name == 'flat_scene':
         return dist(s[:3], np.zeros(3)) > 3.5 or dist(
             s[5:8], np.zeros(3)) > 4 or abs(
                 self.box_y_global) > 1 or self.currReward <= -20
     if self.simulation_name == 'slope_scene':
         return abs(self.box_ori_global) > .4 or dist(
             s[:3], np.zeros(3)) > 2 or self.currReward <= -20
 def checkPhase(self, s):
     if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'CROSS':
         if self.rob_height < .35:
             return (-3, 1)
     if self.primitive == 'PUSH_IN_HOLE':
         if (self.box_height < .2):
             d = dist(s[:2], s[4:6])
             print('DISTANCE: ', d)
             if d < .2 and (self.hole_height > self.box_relative_height):
                 return (10 - d * 5, 1)
             else:
                 return (-3, 1)
     if self.primitive == 'CROSS':
         goal = s[4:6]
         d = dist(goal, np.zeros(2))
         if d < .2:
             print('distance: ', d)
             return (5, 1)
     if self.primitive == 'REORIENT':
         box_to_goal = s[4:6] - s[:2]
         goal_vector = unitVector(box_to_goal)
         goal_direction = math.atan(goal_vector[1] / goal_vector[0])
         curr_direction = s[3]
         d = dist(s[:2], s[4:6])
         if abs(self.box_y) > .8:  # .2
             return (-3, 1)
         if abs(goal_direction - curr_direction) < .15 or d < .2:
             return (5 - abs(self.box_y), 1)
     if self.primitive == 'PUSH_TOWARDS':
         d = dist(s[:2], s[4:6])
         box_to_goal = s[4:6] - s[:2]
         goal_vector = unitVector(box_to_goal)
         goal_direction = math.atan(goal_vector[1] / goal_vector[0])
         curr_direction = s[3]
         if abs(self.box_ori) > .25 or abs(self.box_y) > .35:
             return (-2, 1)
         if d < .2:
             return (5, 1)
     if self.primitive == 'SLOPE_PUSH':
         d = dist(s[:2], s[4:6])
         box_to_goal = s[4:6] - s[:2]
         goal_vector = unitVector(box_to_goal)
         goal_direction = math.atan(goal_vector[1] / goal_vector[0])
         curr_direction = s[3]
         if abs(self.box_ori) > .6 or abs(self.box_y) > .5:
             return (-2, 1)
         if d < .2:
             return (5, 1)
     return (0, 0)
예제 #8
0
 def getNextAction(self, state, angle, i):
     '''Order of priority: Box orientation (theta), Contact Point, Agent orientation'''
     theta = angle[3]
     tilt = angle[4]
     to_contact = dist(
         state[:2],
         np.array(
             [self.x_contact[i][self.phase],
              self.y_contact[self.phase][i]]))
     bench = .25 if self.phase == 2 else .1
     if (i != 0 and i != self.num_agents - 1
         ) or self.phase == 0 or self.phase == 1:
         if theta > self.angle_threshold:  #you're ahead
             action = 3
         elif tilt or to_contact > bench:  # far from your contact point
             action = 1
         elif abs(angle[2]) > .2:  # orientation incorrect
             action = 2
         else:
             action = 0
     else:
         if tilt or to_contact > bench:  # far from your contact point
             action = 1
         elif theta > self.angle_threshold:  #you're ahead
             action = 3
         elif abs(angle[2]) > .2:  # orientation incorrect
             action = 2
         else:
             action = 0
     return action
예제 #9
0
 def rewardFunction(self, s_n, a):
     tank, bridge = self.splitState(s_n.ravel())
     prevTank, prevBridge = self.splitState(self.prev['S'].ravel())
     if bridge[2] < .1 or tank[2] < .1:
         # THIS IS A TEST
         return (0, 1)
     if self.phase == 1:
         if bridge[0] > -.5:  # TODO: Check this benchmark for bridge
             print('## Phase 1 Complete ##')
             self.phase += 1
             return (5, 0)
         vel_r = ((tank[0] - prevTank[0]) + (bridge[0] - prevBridge[0])) * 2
         ori_r = -1 * (abs(tank[5]) + abs(bridge[5])) * .08
         r = vel_r + ori_r
     elif self.phase == 2:
         if tank[0] > -.4:  # TODO: Check this benchmark for cross
             print(' ## Phase 2 Complete ##')
             self.phase += 1
             return (5, 0)
         vel_r = (tank[0] - prevTank[0])
         move_r = -1 * dist(prevBridge[:3], bridge[:3])
         r = vel_r + move_r
     elif self.phase == 3:
         if bridge[0] > -.3:  # TODO: Check this benchmark for pull up
             print(' ## Success ##')
             return (10, 1)
         vel_r = (bridge[0] - prevBridge[0])
         r = vel_r
     return (r, 0)
예제 #10
0
 def checkConditions(self, full_state, a):
     # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive
     if a == None:
         return [True for i in range(self.num_agents)]
     fill = []
     states = self.splitState(full_state)
     prev_states = self.splitState(self.prev['S'])
     angles = self.getAngles(full_state)
     box = states["box"]
     prevBox = prev_states['box']
     for i in range(self.num_agents):
         k = "robot" + str(i)
         angle = angles[k]
         theta = angle[3]
         act = self.actionMap[a[i]]
         curr = states[k]
         if act == "STRAIGHT_BOX":
             fill.append(self.timeOut(i))
         if act == "HOME_CONTACT":
             fill.append(
                 self.timeOut(i) or self.fallingBehind(theta) or
                 dist(curr[:2],
                      np.array([-.58, self.y_contact[self.phase][i]])) < .3)
         if act == "CHANGE_ANGLE":
             fill.append(
                 self.timeOut(i) or self.fallingBehind(theta)
                 or abs(box[2] - curr[2]) < .05)
         if act == "BACK":
             fill.append(self.timeOut(i))
     return fill
예제 #11
0
    def checkConditions(self, full_state, a, complete=True):
        # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive
        if a == None:
            return True
        a = self.actionMap[a]
        s = np.array(full_state).ravel()
        goal_angles, align_y_angles, cross_angles, left_angle, right_angle = self.getAngles(
            s)
        theta, phi = goal_angles
        alpha, beta, to_align = align_y_angles
        goal1, goal2 = cross_angles

        if a == "ANGLE_TOWARDS":
            return abs(theta - np.pi / 2) < 5e-2 or self.counter == 0
        if a == "ALIGN_Y":
            return to_align < .1 or self.counter == 0
        if a == "APPROACH":
            return dist(s[:2], np.zeros(2)) < .7 or self.counter == 0
        if a == 'PUSH_IN':
            if self.primitive == 'PUSH_IN_HOLE':
                return self.box_height < .35 or self.counter == 0
            else:
                return self.counter == 0

        return self.counter == 0
예제 #12
0
 def get_neighbors(self, node, radius, inclusions, exclusions):
     neighbors = []
     for node_other in self.nodes:
         if (node.coords, node_other.coords) in exclusions:
             continue
         elif dist(node, node_other) < radius and node_other != node:
             neighbors.append(node_other)
         elif (node.coords, node_other.coords) in inclusions:
             neighbors.append(node_other)
     return neighbors
예제 #13
0
    def checkPhase(self, s):
        if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'CROSS':
            if self.rob_height < .35:
                return (-3, 1)
        if self.primitive == 'PUSH_IN_HOLE':
            if (self.box_height < .2):
                d = dist(s[:2], s[4:6])
                print('DISTANCE: ', d)
                if d < .2:
                    return (
                        10 - d * 5, 1
                    )  # NOTE: we are training just the first phase (get box into hole)
                else:
                    return (-3, 1)
        if self.primitive == 'CROSS':
            goal = s[4:6]
            d = dist(goal, np.zeros(2))
            if d < .2:
                print('distance: ', d)
                return (5, 1)
        if self.primitive == 'REORIENT':
            box_to_goal = s[4:6] - s[:2]
            goal_vector = unitVector(box_to_goal)
            goal_direction = math.atan(goal_vector[1] / goal_vector[0])
            curr_direction = s[3]

            if abs(self.box_y) > .1:
                return (-3, 1)
            if abs(goal_direction - curr_direction) < .15:
                return (5 - 20 * abs(self.box_y), 1)
        if self.primitive == 'PUSH_TOWARDS':
            d = dist(s[:2], s[4:6])
            box_to_goal = s[4:6] - s[:2]
            goal_vector = unitVector(box_to_goal)
            goal_direction = math.atan(goal_vector[1] / goal_vector[0])
            curr_direction = s[3]
            if abs(self.box_ori) > .25 or abs(self.box_y) > .35:
                return (-2, 1)
            if d < .2:
                return (5, 1)
        return (0, 0)
 def checkConditions(self, full_state, a):
     # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive
     fill = []
     states = self.splitState(full_state)
     prev_states = self.splitState(self.prev['S'])
     angles = self.getAngles(full_state)
     box = states["box"]
     for i in range(self.num_agents):
         if a[0] == None:
             fill.append(True)
             continue
         k = "robot" + str(i)
         angle = angles[k]
         theta = angle[3]
         act = self.actionMap[a[i]]
         curr = states[k]
         if act == "STRAIGHT_BOX":
             fill.append(self.timeOut(i))
         if act == "HOME_CONTACT":
             if self.explicit_control:
                 fill.append(
                     self.timeOut(i) or self.fallingBehind(theta) or dist(
                         curr[:2],
                         np.array([self.x_contact, self.contact[i]])) < .35)
             else:
                 fill.append(
                     self.timeOut(i) or dist(
                         curr[:2],
                         np.array([self.x_contact, self.contact[i]])) < .35)
         if act == "CHANGE_ANGLE":
             if self.explicit_control:
                 fill.append(
                     self.timeOut(i) or self.fallingBehind(theta)
                     or abs(box[2] - curr[2]) < .05)
             else:
                 fill.append(self.timeOut(i) or abs(box[0] - curr[2]) < .05)
         if act == "BACK":
             fill.append(self.timeOut(i))
     return fill
 def isValidAction(self, s, a, angle, i):
     act = self.actionMap[a]
     ori = angle[2]
     if act == "STRAIGHT_BOX":
         return True
     if act == "HOME_CONTACT":
         return not (dist(s[:2], np.array([self.x_contact, self.contact[i]
                                           ])) < .35)
     if act == "CHANGE_ANGLE":
         return not abs(ori) < .05
     if act == "BACK":
         return True
     return
 def checkPhase(self, s):
     s = s.ravel()
     theta = s[2]
     phi = s[3]
     z = s[4]
     to_contact = dist(s[:2], np.zeros(2))
     if theta > .45 or to_contact > 1.2:
         print('#### Failed out')
         return (-3, 1)
     if self.box_height >= self.goal_height and self.dist_box_from_goal < 1:
         print('#### SUCCESS')
         return (10, 1)
     return (0, 0)
 def receiveState(self, msg):
     if self.restart_timer:
         self.start_time = time.time()
         self.restart_timer = False
     floats = vrep.simxUnpackFloats(msg.data)
     restart = 0
     r = None
     self.dist_box_from_goal = dist(np.array(floats[0:2]),
                                    np.array(floats[6:8]))
     features = self.feature_joint_2_feature(floats)
     floats = self.feature_joint_2_joint(np.array(floats).ravel())
     s = (np.array(floats)).reshape(1, -1)
     angles = self.getAngles(s)
     states = self.splitState(s)
     self.box_height = states['box'][-1]
     a = (self.sendAction(s))
     if self.mode == 'GET_STATE_DATA':
         if self.changeAction[0]:
             self.curr_rollout1.append(features[0])
         if self.changeAction[1]:
             self.curr_rollout2.append(features[1])
     rest = 0
     for i in range(self.num_agents):
         loc = 'robot' + str(i)
         angle = angles[loc]
         curr_state = self.getNetInput(states[loc], angle, i)
         if type(self.prev['S'][i]) == np.ndarray and type(
                 self.prev['A'][i]) == int:
             prevAngle = self.prevAngles[loc]
             prev_state = self.getNetInput(self.prev['S'][i], prevAngle, i)
             r, restart = self.rewardFunction(curr_state, prev_state, i)
             rest = restart if restart == 1 else rest
             if self.changeAction[i] or rest:
                 r = r if self.isValidAction(self.prev['S'][i],
                                             self.prev['A'][i], angle,
                                             i) or rest else -1
                 self.agent.store(prev_state, self.prev["A"][i],
                                  np.array([r]).reshape(1, -1), curr_state,
                                  a[i], restart)
                 self.currReward += r
         if self.changeAction[i]:
             self.prev["S"][i] = states[loc]
             self.prev["A"][i] = int(a[i])
             self.prevAngles[loc] = angles[loc]
     if any(self.changeAction) and self.trainMode:
         loss = self.agent.train()
     if restart and r > 0 and self.mode == 'GET_STATE_DATA':
         self.curr_rollout1.append(features[0])
         self.curr_rollout2.append(features[1])
     self.restartProtocol(rest, succeeded=r > 0 if r else False)
     return
예제 #18
0
    def getAux(self, pos, prevPos, blockPos, prevBlock, ori, prevOri, phase):
        if phase == 1:
            dist_r = (dist(prevPos, blockPos) - dist(pos, blockPos))

            block_r = blockPos[0] - prevBlock[0]
            prevVec = unitVector(vector(prevOri))
            vec = unitVector(vector(ori))
            goal = unitVector(blockPos[:2] - pos[:2])

            prevDot = dot(prevVec, goal)
            currDot = dot(vec, goal)
            ori_r = currDot - prevDot

            return ((block_r + dist_r + 2 * ori_r) * self.w_phase1 - .01, 0)

        if phase == 2:
            goal = np.array([.80, blockPos[1], pos[2]])
            delta = dist(pos, goal)
            prevDelta = dist(prevPos, goal)
            dist_r = prevDelta - delta
            y_r = -abs(blockPos[1] - pos[1])

            return ((dist_r + .15 * y_r - .05 * abs(ori)) * self.w_phase3 -
                    .01, 0)
    def sendActionForPlan(self, states, phase):
        s = states['feature']
        if dist(s[:3], s[5:8]) < .3:
            msg = Int8()
            msg.data = 1
            self.changePoint.publish(msg)
        action_index = self.box_policy.get_action(
            self.concatenate_identifier(s))

        self.controller.goal = s.ravel()[:2]
        action = self.controller.getPrimitive(
            self.controller.feature_2_task_state(s.ravel()),
            self.action_map[action_index])

        msg.x, msg.y = (action[0], action[1])
        self.pubs[self.name].publish(msg)
        return
 def reward_function(self, s):
     s = s.ravel()
     succeeded = self.succeeded(s)
     done = self.decide_to_restart(s)
     if succeeded:
         if self.simulation_name == 'elevated_scene':
             return 10 - dist(s[:3], s[5:8]) * 5
         if self.simulation_name == 'flat_scene':
             return 10 - abs(self.box_ori_global) * 3
         if self.simulation_name == 'slope_scene':
             return 10 - abs(self.box_ori_global) * 3
     if done and not succeeded:
         return -3
     else:
         if self.prev_action_was_valid:
             return -.25
         else:
             return -.4
예제 #21
0
 def decide_to_restart(self, s):
     assert type(self.simulation_name) == str
     # if far away from box, far away from goal, box dropped, or bot dropped
     # Returns tuple (restart, done)
     # TODO: Get rid of this
     max_steps = 100 if self.simulation_name == 'slope_scene' else 50
     if self.num_steps > max_steps:
         return True, False
     failed = False
     if self.simulation_name == 'elevated_scene':
         failed = dist(s[:3], np.zeros(3)) > 4 or dist(s[5:8], np.zeros(3)) > 5 or self.box_z_global < .2 or self.bot_z_global < .3
     if self.simulation_name == 'flat_scene':
         failed = dist(s[:3], np.zeros(3)) > 5 or dist(s[5:8], np.zeros(3)) > 5 or abs(self.box_y_global) > 2
     if self.simulation_name == 'slope_scene':
         failed = abs(self.box_ori_global) > 1 or dist(s[:3], np.zeros(3)) > 5 or dist(s[:3], s[5:8]) > 10
     return failed, failed
    def checkConditions(self, full_state, a, complete=True):
        # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive
        if a == None:
            return True
        s = np.array(full_state).ravel()
        goal_angles, align_y_angles, cross_angles, left_angle, right_angle = self.getAngles(
            s)
        theta, phi = goal_angles
        alpha, beta, to_align = align_y_angles
        goal1, goal2 = cross_angles

        if a == "ANGLE_TOWARDS":
            return abs(theta - np.pi / 2) < 5e-2
        if a == "ANGLE_TOWARDS_GOAL":
            return abs(goal1 - np.pi / 2) < 5e-2
        if a == "ALIGN_Y":
            return to_align < .1
        if a == "APPROACH":
            return dist(s[:2], np.zeros(2)) < .7
        if a == 'PUSH_IN':
            return False
        return False
 def getNextAction(self, state, angle, i):
     '''Order of priority: Box orientation (theta), Contact Point, Agent orientation'''
     theta = angle[3]
     tilt = angle[4]
     ori = angle[2]
     if self.explicit_control:  #outdated
         to_contact = dist(state[:2],
                           np.array([self.x_contact, self.contact[i]]))
         if theta > self.angle_threshold:  #you're ahead or box is slipping away
             action = 3
         elif to_contact > .5:  # far from your contact point
             action = 1
         elif abs(ori) > .5:  # orientation incorrect
             action = 2
         elif tilt:
             action = 3
         else:
             action = 0  # for now just push
         return action
     else:
         # In the state information: relative position to contact point, theta, orientation angle. Total: 4
         return self.agent.get_action(self.getNetInput(state, angle, i))
예제 #24
0
    def getAngles(self, s):
        states = self.splitState(s)
        box = states['box']
        box_ori = box[2]

        angles = {}
        if self.phase == 1 or self.phase == 3:  # these are transitioning phases
            reached = True
            for i in range(self.num_agents):
                to_contact = dist(
                    states["robot" + str(i)][:2],
                    np.array([
                        self.x_contact[i][self.phase],
                        self.y_contact[self.phase][i]
                    ]))
                if to_contact > .1:
                    reached = False
            if reached:
                self.phase = (self.phase + 1) % 4
                print('PHASE', self.phase)
        for i in range(self.num_agents):
            curr = states["robot" + str(i)]
            ori = curr[2]
            pos = curr[:2]

            # Calculating angle for STRAIGHT_BOX and CHANGE_ANGLE
            phi = ori - box_ori
            direction = -1 if self.y_contact[self.phase][i] > 0 else 1
            # Special case: if you're smack in the middle, you should go back if either side is uneven
            direction = np.sign(box_ori) if self.y_contact[
                self.phase][i] == 0 else direction

            goal_relative_to_box = np.array(
                [self.x_contact[i][self.phase], self.y_contact[self.phase][i]])
            phi_trans = phi - np.pi / 2
            phi_trans = phi_trans + 2 * np.pi if phi_trans < -np.pi else phi_trans
            goal = goal_relative_to_box - pos  # this is the vector from current position to contact point all relative to box
            front_v = vector(phi_trans)
            right_v = vector(phi_trans - np.pi / 2)

            # Calculating angles for HOME_CONTACT
            relative_y = -dot(unitVector(goal),
                              right_v)  # negative for convention
            relative_x = dot(unitVector(goal), front_v)
            buff = (
                -np.pi if relative_y < 0 else np.pi
            ) if relative_x < 0 else 0  # since we want to map -pi to pi
            alpha = np.arctan(relative_y / relative_x) + buff
            beta = -np.pi - alpha if alpha < 0 else np.pi - alpha
            contact_angles = (alpha, beta)

            # Boolean to determine need for translation adjustment ie. if box is too far in, tilt it towards you
            ratio = abs(
                goal[1] / goal[0]
            )  # ratio  horizontal and vertical distance from contact point
            tilt = True if ratio > .8 and np.sign(
                goal[1] * self.y_contact[self.phase][i]) < 0 and abs(
                    goal[0]) < .4 else False

            angles['robot' + str(i)] = (alpha, beta, phi, direction * box_ori,
                                        tilt)

        return angles
예제 #25
0
 def calculate_distances_to_neighbors(self):
     for n in self.neighbors:
         self.distances_to_neighbors[n] = dist(n.pos, self.pos)
예제 #26
0
    def getAngles(self, s):
        s = s.ravel()
        goal = self.goal  # This is relative position of box w.r.t. the robot

        if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'REORIENT' or self.primitive == 'PUSH_TOWARDS':
            relative_y = goal[0]
            relative_x = -goal[1]
        if self.primitive == 'CROSS':
            relative_y = s[4]
            relative_x = -s[5]
        buff = (-np.pi if relative_y < 0 else np.pi
                ) if relative_x < 0 else 0  # since we want to map -pi to pi
        theta = np.arctan(relative_y / relative_x) + buff
        phi = -np.pi - theta if theta < 0 else np.pi - theta
        goal_angles = (theta, phi)

        # NOTE: Depending on the primitive, these all reference the box and some otherpoint past it as well
        box_from_hole = s[:2] - s[4:6]
        hole = s[4:6]
        aligned = hole - dot(
            hole, unitVector(box_from_hole)) * unitVector(box_from_hole)
        relative_x = -aligned[1]
        relative_y = aligned[0]
        buff = (-np.pi if relative_y < 0 else np.pi
                ) if relative_x < 0 else 0  # since we want to map -pi to pi
        alpha = np.arctan(relative_y / relative_x) + buff
        beta = -np.pi - alpha if alpha < 0 else np.pi - alpha
        align_y_angles = (alpha, beta, dist(aligned, np.zeros(2)))

        relative_y = s[4]
        relative_x = -s[5]
        buff = (-np.pi if relative_y < 0 else np.pi
                ) if relative_x < 0 else 0  # since we want to map -pi to pi
        goal1 = np.arctan(relative_y / relative_x) + buff
        goal2 = -np.pi - goal1 if goal1 < 0 else np.pi - goal1
        cross_angles = (goal1, goal2)

        pos = s[:2]
        psi = s[3]
        goal_relative_to_box = np.array([self.x_contact, self.contact['left']])
        rotation_matrix = np.array([[np.cos(psi), -np.sin(psi)],
                                    [np.sin(psi), np.cos(psi)]])
        home = pos + rotation_matrix.dot(goal_relative_to_box)
        relative_y = home[0]
        relative_x = -home[1]
        buff = (-np.pi if relative_y < 0 else np.pi
                ) if relative_x < 0 else 0  # since we want to map -pi to pi
        alpha = np.arctan(relative_y / relative_x) + buff
        beta = -np.pi - alpha if alpha < 0 else np.pi - alpha
        left_angle = (alpha, beta)

        goal_relative_to_box = np.array(
            [self.x_contact, self.contact['right']])
        home = pos + rotation_matrix.dot(goal_relative_to_box)
        relative_y = home[0]
        relative_x = -home[1]
        buff = (-np.pi if relative_y < 0 else np.pi
                ) if relative_x < 0 else 0  # since we want to map -pi to pi
        alpha = np.arctan(relative_y / relative_x) + buff
        beta = -np.pi - alpha if alpha < 0 else np.pi - alpha
        right_angle = (alpha, beta)

        return goal_angles, align_y_angles, cross_angles, left_angle, right_angle
예제 #27
0
    def receiveState(self, msg): 
        if self.curr_episode[0] >= self.local_curr_episode_synchronous + 1 and self.curr_episode[1] == False:
            print('ENVIRONMENT: ', self.simulation_name)
            self.done = False
            self.start_time = time.time()
            # Train for 20 episode, Test for 20 episode
            if self.agent.testing_to_record_progress and self.tracker_for_testing % 20 == 0:
                self.agent.testing_to_record_progress = False
                self.tracker_for_testing = 0
            elif not self.agent.testing_to_record_progress and self.tracker_for_testing % 20 == 0 and len(self.agent.policy.exp) >= self.agent.initial_explore:
                self.agent.testing_to_record_progress = True
                self.tracker_for_testing = 0

            self.local_curr_episode_synchronous += 1
            self.tracker_for_testing += 1

            if self.agent.testing_to_record_progress:
                print('  ##### TESTING ##### ')
            elif len(self.agent.policy.exp) < self.agent.initial_explore:
                print('  ##### EXPLORATION ##### ')
            else:
                print('  ##### TRAINING ##### ')

        floats = vrep.simxUnpackFloats(msg.data)
        self.bot_z_global = floats[self.s_n + 1]
        self.box_z_global = floats[self.s_n]
        self.box_y_global = floats[-2]
        self.box_ori_global = floats[-1]
        local_state = np.array(floats[:self.s_n]).ravel()
        
        adjusted_state_for_controls = self.controller.feature_2_task_state(local_state)
        changeAction = self.changeAction(adjusted_state_for_controls, self.action_map[self.prev['A'][0]], complete=False) if type(self.prev['A']) == tuple else True
        s = (np.array(local_state)).reshape(1,-1)

        succeeded = self.succeeded(s.ravel())
        restart, done = self.decide_to_restart(s.ravel())

        self.done = restart or succeeded
        reward = self.reward_function(s)
        if not self.curr_episode[1]:  # Not finished yet with the episode for syncing purposes
            if not self.done:  # If we hasn't been declared done
                if changeAction:
                    self.counter = self.period
                    action_index, action_control = (self.sendAction(s, changeAction))
                    self.num_steps += 1
                    if self.isValidAction(adjusted_state_for_controls, self.action_map[action_index]):
                        if len(self.curr_rollout) > 0:
                            if all([dist(r, s.ravel()) > .3 for r in self.curr_rollout[-5:]]):
                                self.curr_rollout.append(s.ravel())
                        else:
                            self.curr_rollout.append(s.ravel())
                        if self.mode == 'GET_STATE_DATA':
                            print('Length data for collection: ', len(self.curr_rollout) + self.curr_size)
                    if type(self.prev["S"]) == np.ndarray and not self.agent.testing_to_record_progress:
                        print(self.action_map[self.prev['A'][0]], reward)
                        self.agent.store(self.prev['S'], np.array(self.prev["A"][0]), reward, s, 0, done or succeeded, self.prev['A'][0])
                    if self.trainMode:
                        loss = self.agent.train(self.curr_episode[0])

                    self.prev["S"] = s
                    self.prev["A"] = (int(action_index), action_control)
                    if not self.curr_episode[1]:
                        self.currReward += reward
                else:
                    action_index, a = self.sendAction(s, changeAction)
            else:        
                if type(self.prev["S"]) == np.ndarray:
                    prev_s = self.prev['S']
                    if not self.agent.testing_to_record_progress:
                        self.agent.store(prev_s, np.array(self.prev["A"][0]), reward, s, 0, done or succeeded, self.prev['A'][0])
                        print('Last transition recorded with reward: ', reward)

                self.currReward += reward   
                if succeeded:
                    assert reward > 0
                    print(' ##### SUCCESS ')
                    print(' ##### Success reward: ', reward)
        
        self.restartProtocol(self.done , succeeded=succeeded)   
        return