예제 #1
0
    def tick(self):
        cond_active = False
        for c in self.conds:
            if isinstance(c, Timer):
                # if it is a timer entry, just update the timer and check if it
                # has expired
                c.tick()
                if c.ring():
                    self.reward = self.rewardval
                    self.activate()
                    c.reset()
                    cond_active = True

            elif (self.env.is_in(self.env.state, c) and
                  (self.conds[c] is None or
                   HRLutils.similarity(HRLutils.normalize(self.context),
                                       self.conds[c]) > 0.3)):
                # if it is a state entry, check if the agent is in the region
                # associated with that state, and check if that region is the
                # one corresponding to the currently selected context

                self.reward = self.rewardval

                self.rewardamount += 1
                if self.rewardamount > self.rewardresetamount:
                    self.activate()
                    self.rewardamount = 0

                cond_active = True

        # if no termination conditions met, just give default reward
        if not cond_active:
            self.reward = self.defaultreward

        # reset rewardamount when the reset signal is sent (so that there won't
        # be any leftover rewardamount from the agent's previous decision)
        if self.t > self.resettime[0] and self.t < self.resettime[1]:
            self.rewardamount = 0

        # add a penalty if the state hasn't changed (to help prevent agent from
        # getting stuck)
        if sum(self.prev_state) != 0 and \
                HRLutils.similarity(HRLutils.normalize(self.env.state),
                                    HRLutils.normalize(self.prev_state)) < 1.0:
            self.state_penalty = 0.0
        else:
            self.state_penalty += 0.0001
        self.prev_state = copy.deepcopy(self.env.state)

        self.reward = self.reward - self.state_penalty
예제 #2
0
    def tick(self):
        cond_active = False
        for c in self.conds:
            if isinstance(c, Timer):
                # if it is a timer entry, just update the timer and check if it
                # has expired
                c.tick()
                if c.ring():
                    self.reward = self.rewardval
                    self.activate()
                    c.reset()
                    cond_active = True

            elif (self.env.is_in(self.env.state, c)
                  and (self.conds[c] is None or HRLutils.similarity(
                      HRLutils.normalize(self.context), self.conds[c]) > 0.3)):
                # if it is a state entry, check if the agent is in the region
                # associated with that state, and check if that region is the
                # one corresponding to the currently selected context

                self.reward = self.rewardval

                self.rewardamount += 1
                if self.rewardamount > self.rewardresetamount:
                    self.activate()
                    self.rewardamount = 0

                cond_active = True

        # if no termination conditions met, just give default reward
        if not cond_active:
            self.reward = self.defaultreward

        # reset rewardamount when the reset signal is sent (so that there won't
        # be any leftover rewardamount from the agent's previous decision)
        if self.t > self.resettime[0] and self.t < self.resettime[1]:
            self.rewardamount = 0

        # add a penalty if the state hasn't changed (to help prevent agent from
        # getting stuck)
        if sum(self.prev_state) != 0 and \
                HRLutils.similarity(HRLutils.normalize(self.env.state),
                                    HRLutils.normalize(self.prev_state)) < 1.0:
            self.state_penalty = 0.0
        else:
            self.state_penalty += 0.0001
        self.prev_state = copy.deepcopy(self.env.state)

        self.reward = self.reward - self.state_penalty
예제 #3
0
    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at the current context to find the goal

        goal = [c for c in self.contexts
                if self.contexts[c] == self.context][0]

        stepsize = 0.1
        self.optimal_move = None
        for y in [
                v * stepsize for v in range(
                    int(-self.imgsize[1] / (2 * stepsize)) + 1,
                    int(self.imgsize[1] / (2 * stepsize)) - 1)
        ]:
            for x in [
                    v * stepsize for v in range(
                        int(-self.imgsize[0] / (2 * stepsize)) + 1,
                        int(self.imgsize[0] / (2 * stepsize)) - 1)
            ]:
                if self.is_in((x, y), goal):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions,
                        key=lambda x: -1
                        if self.is_in((x[1][0] * self.dx + self.state[0], x[1][
                            1] * self.dx + self.state[1]), "wall"
                                      ) else HRLutils.similarity(x[1], pt))[0]
                    return
예제 #4
0
    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at whether or not we have the package to pick a goal state

        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in
                  range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
                        int(self.imgsize[1] / (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in
                      range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
                            int(self.imgsize[0] / (2 * stepsize)) - 1)]:
                if ((self.is_in((x, y), "a") and not self.in_hand) or
                        (self.is_in((x, y), "b") and self.in_hand)):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions, key=lambda x:-1
                        if self.is_in((x[1][0] * self.dx + self.state[0],
                                       x[1][1] * self.dx + self.state[1]),
                                      "wall")
                        else HRLutils.similarity(x[1], pt))[0]

                    return
예제 #5
0
    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at the current context to find the goal

        goal = [c for c in self.contexts
                if self.contexts[c] == self.context][0]

        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in range(int(-self.imgsize[1] /
                                                  (2 * stepsize)) + 1,
                                              int(self.imgsize[1] /
                                                  (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in range(int(-self.imgsize[0] /
                                                      (2 * stepsize)) + 1,
                                                  int(self.imgsize[0] /
                                                      (2 * stepsize)) - 1)]:
                if self.is_in((x, y), goal):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions, key=lambda x:-1 if
                        self.is_in((x[1][0] * self.dx + self.state[0],
                                    x[1][1] * self.dx + self.state[1]),
                                   "wall")
                        else HRLutils.similarity(x[1], pt))[0]
                    return
예제 #6
0
    def calc_optimal_move(self):
        """Calculates the optimal move for the agent to make in the current state.

        Used for debugging mainly.
        """

        # grid search the image with the given stepsize
        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
                                            int(self.imgsize[1] / (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
                                                int(self.imgsize[0] / (2 * stepsize)) - 1)]:
                # if the pt you're looking at is in the region you're looking for
                if self.is_in((x, y), "target"):
                    # generate a target point in the direction from current location to target
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))

                    # pick the action that is closest to the target point
                    # note: penalize actions that would involve moving through a wall
                    self.optimal_move = max(self.actions, key=lambda x:-1
                                            if self.is_in((x[1][0] * self.dx + self.state[0],
                                                           x[1][1] * self.dx + self.state[1]),
                                                          "wall")
                                            else HRLutils.similarity(x[1], pt))[0]
                    return
예제 #7
0
    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at whether or not we have the package to pick a goal state

        stepsize = 0.1
        self.optimal_move = None
        for y in [
                v * stepsize for v in range(
                    int(-self.imgsize[1] / (2 * stepsize)) + 1,
                    int(self.imgsize[1] / (2 * stepsize)) - 1)
        ]:
            for x in [
                    v * stepsize for v in range(
                        int(-self.imgsize[0] / (2 * stepsize)) + 1,
                        int(self.imgsize[0] / (2 * stepsize)) - 1)
            ]:
                if ((self.is_in((x, y), "a") and not self.in_hand)
                        or (self.is_in((x, y), "b") and self.in_hand)):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions,
                        key=lambda x: -1
                        if self.is_in((x[1][0] * self.dx + self.state[0], x[1][
                            1] * self.dx + self.state[1]), "wall"
                                      ) else HRLutils.similarity(x[1], pt))[0]

                    return
예제 #8
0
    def tick(self):
        # check if env is currently giving reward (we want to give pseudoreward at the same time)
        if self.env.reward != 0:
            if self.target_answer is None:
                self.reward = 0
            else:
                # check if the selected action matches the correct action
                self.reward = self.rewardval if HRLutils.similarity(self.target_answer, self.action) > 0.5 else -self.rewardval
        else:
            self.reward = 0

            # update the target_answer (the action the low level should be selecting given
            # the current context)
            if self.context[0] == "orientation":
                self.target_answer = self.env.state[:self.env.num_orientations]
            elif self.context[0] == "shape":
                self.target_answer = self.env.state[self.env.num_orientations:-self.env.num_colours]
            else:
                self.target_answer = None
예제 #9
0
    def calc_optimal_move(self):
        """Calculates the optimal move for the agent to make in the current
        state.

        Used for debugging.
        """

        # grid search the image with the given stepsize
        stepsize = 0.1
        self.optimal_move = None
        for y in [
                v * stepsize for v in range(
                    int(-self.imgsize[1] / (2 * stepsize)) + 1,
                    int(self.imgsize[1] / (2 * stepsize)) - 1)
        ]:
            for x in [
                    v * stepsize for v in range(
                        int(-self.imgsize[0] / (2 * stepsize)) + 1,
                        int(self.imgsize[0] / (2 * stepsize)) - 1)
            ]:
                # if the pt you're looking at is in the region you're
                # looking for
                if self.is_in((x, y), "target"):
                    # generate a target point in the direction from current
                    # location to target
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))

                    # pick the action that is closest to the target point
                    # note: penalize actions that would involve moving through
                    # a wall
                    self.optimal_move = max(
                        self.actions,
                        key=lambda x: -1
                        if self.is_in((x[1][0] * self.dx + self.state[0], x[1][
                            1] * self.dx + self.state[1]), "wall"
                                      ) else HRLutils.similarity(x[1], pt))[0]
                    return