Example #1
0
 def reset(self):
     """Prepares the Agent for a new episode.
     """
     log_and_display('Initializing episode')
     self.env.environment_reset()
     self.current_state_id = self.env.get_current_state()
     self.total_explorations = 0
Example #2
0
    def execute_action(self, action_id):
        action = self.env.actions[action_id]

        if action[0] == self.env.action_type1:
            log_and_display('Action: Moving claw ' + str(action[1]))
            return self.env.move_arm(action[1], action_id)
        elif action[0] == self.env.action_type2:
            log_and_display('Action: Engaging/Disengaging claw ' + str(action[1]))
            return self.env.enable_grip(action[1], action_id)
Example #3
0
    def update_q_table(self, state, action, reward, state_new):
        """Routing to update q-table.
        """
        q_current = self.q_table[state, action]
        error = reward + self.discount * np.max(self.q_table[state_new]) - q_current
        self.q_table[state, action] = q_current + self.learn_rate * error

        msg = "Q-Value: S:{}, A:{}, R:{}, S`:{}, TE: {}, Q:{}, Q`:{}".format(state, action, reward, state_new, error,
                                                                             q_current, self.q_table[state, action])
        log_and_display(msg)
Example #4
0
 def select_action(self, current_state_id):
     """This method returns an action based on current state. It returns a mix of
     exploratory and exploitative actions based on epsilon value.
     """
     if np.random.uniform() < self.epsilon:
         log_and_display('Exploring...')
         self.total_explorations += 1
         action_id = np.random.choice(self.env.total_actions)
     else:
         log_and_display('Exploiting...')
         action_id = np.argmax(self.q_table[current_state_id])
     return action_id
Example #5
0
 def pre_populate_qtable(self):
     """Not used, but can be invoked to pre-populate the Q table with good known values
     """
     log_and_display("Pre populating the Q Table with some known values - this helps in converging faster")
     for index, val in enumerate(self.q_table):
         if not self.env.states[index][3]:  # If object not held
             self.q_table[index][0] += config.REWARD_FIRST_SUCCESS  # Encourage grip enable
             self.q_table[index][1] -= config.REWARD_BAD_STEP
             if self.env.states[index][6] > 0.03:
                 for act in range(self.env.total_actions):
                     if self.env.actions[act][0] == self.env.action_type1 and self.env.actions[act][1][2] > 0.03:
                         self.q_table[index][act] = -100
         else:
             self.q_table[index][1] += config.REWARD_FIRST_SUCCESS
             self.q_table[index][0] -= config.REWARD_BAD_STEP
             if self.env.states[index][6] <= 0.07:
                 for act in range(self.env.total_actions):
                     if self.env.actions[act][0] == self.env.action_type1 and self.env.actions[act][1][2] <= 0.07:
                         self.q_table[index][act] = -100
     log_and_display("Done")
Example #6
0
    def __get_canonical_state(self):
        """Fetches position of the arm, the object and state of the gripper, calculates the state id that
        their values correspond to, and returns the state id
        """
        pos_obj = self.robot.get_position(self.robot.cylinder_handle)
        pos_arm = self.robot.get_position(self.robot.gripper_handle)
        object_held = self.robot.is_object_held()

        current_state_id = 0
        for state in self.states:
            if abs(state[0] - pos_obj[0]) < self.tolerance \
                    and abs(state[1] - pos_obj[1]) < self.tolerance \
                    and abs(state[2] - pos_obj[2]) < self.tolerance \
                    and state[3] == object_held \
                    and abs(state[4] - pos_arm[0]) < self.unit_step \
                    and abs(state[5] - pos_arm[1]) < self.unit_step \
                    and abs(state[6] - pos_arm[2]) < self.unit_step:
                return current_state_id
            current_state_id += 1

        log_and_display('State was invalid: ' + str(pos_obj) +
                        str(object_held) + str(pos_arm))
        return self.invalid_states_index
Example #7
0
if not os.path.exists(config.Q_TABLE_DIR):
    os.makedirs(config.Q_TABLE_DIR)

vrep_ip = '127.0.0.1'
vrep_port = 19997

env = Environment(vrep_ip, vrep_port)

agent = Agent(env, epsilon=config.EPSILON, q_init_val=config.Q_INIT_VAL,
              discount=config.DISCOUNT, learn_rate=config.LEARN_RATE)

episodes = config.NUM_EPISODES
agent.load_qtable()

log_and_display("%%%%%%%%%%%%%%%%%%%%%%%%% Main starts %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
log_and_display("Epsilon: " + str(config.EPSILON))
log_and_display("Epsilon Decay: " + str(config.EPSILON_DECAY))
log_and_display("Q Init: " + str(config.Q_INIT_VAL))
log_and_display("Discount: " + str(config.DISCOUNT))
log_and_display("Learning Rate: " + str(config.LEARN_RATE))
log_and_display("Max Episodes: " + str(config.NUM_EPISODES))
log_and_display("Max Actions/Episodes: " + str(config.NUM_MAX_ACTIONS))
log_and_display("Env Dimensions: " + str(config.ENV_DIMENSION))

if train_mode:
    # Here we train the agent and generate the qtable.
    # qtable will be dumped in qtables/qtable.txt.npy
    while episodes > 0:
        log_and_display('=============================================> Episode ' + str(episodes))
        agent.reset()
Example #8
0
    def __init__(self, vrep_ip: str, vrep_port: int):
        """Prepares the actions, states and other environment variables
        """
        self.robot = RobotArm(vrep_ip, vrep_port)

        self.tolerance = utility.rnd(config.TOLERANCE)
        self.unit_step = utility.rnd(config.UNIT_STEP_SIZE)

        dim = self.robot.get_env_dimensions()

        # Actions #########################################################
        # The actions the agent can take - either goto some x,y,z position
        # or engage/disengage claw

        x_range_actions = np.arange(dim[0][0], dim[0][1], self.unit_step)
        y_range_actions = np.arange(dim[1][0], dim[1][1], self.unit_step)
        z_range_actions = np.arange(dim[2][0], dim[2][1], self.unit_step)

        # Actions consist of
        #   a) Gripper Enable/Disable
        #   b) Goto location (x, y, z)
        self.action_type1 = 'move_gripper'
        self.action_type2 = 'engage_gripper'
        self.actions = []
        self.actions.append([self.action_type2, True])
        self.actions.append([self.action_type2, False])

        print(x_range_actions[1:-1])
        print(y_range_actions[1:-1])
        print(z_range_actions[1:-1])

        for x in x_range_actions[1:-1]:
            for y in y_range_actions[1:-1]:
                for z in z_range_actions[1:-1]:
                    self.actions.append([self.action_type1, [x, y, z]])

        self.total_actions = len(self.actions)

        # States #########################################################
        # States consist of
        #   a) Position of the object (x, y, z coordinates)
        #   b) If it is held by gripper or not
        #   c) Position of the gripper (x, y, z coordinates)
        x_range = np.arange(dim[0][0], dim[0][1], self.tolerance)
        y_range = np.arange(dim[1][0], dim[1][1], self.tolerance)
        z_range = np.arange(dim[2][0], dim[2][1], self.tolerance)

        self.states = []
        self.invalid_state = config.INVALID_STATE
        for x in x_range:
            for y in y_range:
                for z in z_range:
                    for b in [True, False]:
                        for xa in x_range_actions:
                            for ya in y_range_actions:
                                for za in z_range_actions:
                                    self.states.append(
                                        [x, y, z, b, xa, ya, za])

        # invalid state, the last state. This state suggests that the object is outside the environment.
        self.states.append(self.invalid_state)
        self.total_states = len(self.states)
        self.invalid_states_index = self.total_states - 1

        log_and_display("There are {0} actions.".format(self.total_actions))
        log_and_display("There are {0} states.".format(self.total_states))

        self.episode_object_gripped = False
        self.environment_breached = False
        self.is_success = False
        self.actionstate_prev = {}
        self.actionstate_curr = {}
Example #9
0
def calculate_reward(env):
    """Implements the reward strategy, returns reward, environment_breached, is_success
    """

    if not is_valid_state(env):
        log_and_display('Penalty: Reached invalid state, terminating')
        return config.REWARD_TERMINATION, True, False

    if not is_cylinder_standing(env):
        log_and_display('Penalty: Cylinder has fallen, terminating')
        return config.REWARD_TERMINATION, True, False

    if not is_bin_inplace(env):
        log_and_display('Penalty: Bin has shifted, terminating')
        return config.REWARD_TERMINATION, True, False

    if is_grip_engaged_with_no_object(env):
        log_and_display('Penalty: Claw is engaged but cylinder is not in claw')
        return config.REWARD_BAD_STEP, False, False

    if is_cylinder_not_dropped_in_bin(env):
        log_and_display('Penalty: Claw did not drop the cylinder in the bin')
        return config.REWARD_BAD_STEP, False, False

    if is_grip_holding_object(env):
        log_and_display('Reward: Claw could grab the cylinder for first time')
        return config.REWARD_FIRST_SUCCESS, False, False

    if is_object_in_bin(env):
        log_and_display('Reward: Cylinder in bucket. Objective achieved !!!!!!!!')
        return config.REWARD_GOAL_ACHIEVED, True, True

    return config.REWARD_DEFAULT, False, False  # Default
Example #10
0
def calculate_reward(env):
    """Implements the reward strategy, returns reward, environment_breached, is_success
    """

    if not is_valid_state(env):
        log_and_display('Penalty: Reached invalid state, terminating')
        return config.REWARD_TERMINATION, True, False

    if not is_cylinder_standing(env):
        log_and_display('Penalty: Cylinder has fallen, terminating')
        return config.REWARD_TERMINATION, True, False

    if not is_bin_inplace(env):
        log_and_display('Penalty: Bin has shifted, terminating')
        return config.REWARD_TERMINATION, True, False

    if is_grip_engaged_with_no_object(env):
        log_and_display('Penalty: Claw is engaged but cylinder is not in claw')
        return config.REWARD_BAD_STEP, False, False

    if is_cylinder_not_dropped_in_bin(env):
        log_and_display('Penalty: Claw did not drop the cylinder in the bin')
        return config.REWARD_BAD_STEP, False, False

    if is_grip_holding_object(env):
        log_and_display('Reward: Claw could grab the cylinder for first time')
        return config.REWARD_FIRST_SUCCESS, False, False

    if is_object_in_bin(env):
        log_and_display(
            'Reward: Cylinder in bucket. Objective achieved !!!!!!!!')
        return config.REWARD_GOAL_ACHIEVED, True, True

    return config.REWARD_DEFAULT, False, False  # Default