Example #1
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''

        # Grab top discs on source and dest pegs.
        source_index = int(action[0])
        dest_index = int(action[1])
        source_top = state[source_index][-1]
        dest_top = state[dest_index][-1]

        # Make new state.
        new_state_ls = state.get_data()[:]
        if dest_top < source_top:
            new_state_ls[source_index] = new_state_ls[source_index][:-1]
            if new_state_ls[source_index] == "":
                new_state_ls[source_index] = " "
            new_state_ls[dest_index] += source_top
            new_state_ls[dest_index] = new_state_ls[dest_index].replace(
                " ", "")
        new_state = State(new_state_ls)

        # Set terminal.
        if self._is_goal_state(
                state):  # new_state[1] == "abc" or new_state[2] == "abc":
            new_state.set_terminal(True)

        return new_state
Example #2
0
 def __init__(self, belief_distribution):
     '''
     Args:
         belief_distribution (defaultdict)
     '''
     self.distribution = belief_distribution
     State.__init__(self, data=list(belief_distribution.values()))
 def __init__(self, x, x_dot, theta, theta_dot):
     #using round to discretize each component of the state
     self.x = round(x, 1)
     self.x_dot = round(x_dot, 1)
     self.theta = round(theta, 3)
     self.theta_dot = round(theta_dot, 1)
     State.__init__(self, data=[self.x, self.x_dot, self.theta, self.theta_dot])
    def __init__(self, position, velocity, done):
        self.position = position
        self.velocity = velocity

        State.__init__(self,
                       np.concatenate((position, velocity), axis=0),
                       is_terminal=done)
Example #5
0
 def __init__(self, belief_distribution):
     '''
     Args:
         belief_distribution (defaultdict)
     '''
     self.distribution = belief_distribution
     State.__init__(self, data=belief_distribution.values())
Example #6
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''

        # Grab top discs on source and dest pegs.
        source_index = int(action[0])
        dest_index = int(action[1])
        source_top = state[source_index][-1]
        dest_top = state[dest_index][-1]

        # Make new state.        
        new_state_ls = state.get_data()[:]
        if dest_top < source_top:
            new_state_ls[source_index] = new_state_ls[source_index][:-1]
            if new_state_ls[source_index] == "":
                new_state_ls[source_index] = " "
            new_state_ls[dest_index] += source_top
            new_state_ls[dest_index] = new_state_ls[dest_index].replace(" ", "")
        new_state = State(new_state_ls)

        # Set terminal.
        if self._is_goal_state(state): # new_state[1] == "abc" or new_state[2] == "abc":
            new_state.set_terminal(True)

        return new_state
Example #7
0
 def __init__(self, x, x_dot, theta, theta_dot):
     #using round to discretize each component of the state
     self.x = round(x, 1)
     self.x_dot = round(x_dot, 1)
     self.theta = round(theta, 3)
     self.theta_dot = round(theta_dot, 1)
     State.__init__(self,
                    data=[self.x, self.x_dot, self.theta, self.theta_dot])
    def __init__(self, observation, is_terminal=False):
        self.position = observation["position"]
        self.velocity = observation["velocity"]
        self.to_target = observation["to_target"]
        data = np.concatenate((self.position, self.velocity, self.to_target),
                              axis=0)

        State.__init__(self, data=data, is_terminal=is_terminal)
Example #9
0
    def __init__(self, x, y, x_dot, y_dot, on_platform_type, is_terminal=False):
        self.x = x
        self.y = y
        self.x_dot = x_dot
        self.y_dot = y_dot
        self.on_platform_type = on_platform_type

        State.__init__(self, data=[self.x, self.y, self.x_dot, self.y_dot, self.on_platform_type], is_terminal=is_terminal)
    def __init__(self, x, y, xdot, ydot, is_terminal=False):
        self.x = x
        self.y = y
        self.xdot = xdot
        self.ydot = ydot

        data = np.asarray([x, y, xdot, ydot])

        State.__init__(self, data=data, is_terminal=is_terminal)
Example #11
0
    def __init__(self, objects):
        '''
        Args:
            objects (dict of OOMDPObject instances): {key=object class (str):val = object instances}
        '''
        self.objects = objects
        self.update()

        State.__init__(self, data=self.data)
Example #12
0
    def __init__(self, objects):
        '''
        Args:
            objects (dict of OOMDPObject instances): {key=object class (str):val = object instances}
        '''
        self.objects = objects
        self.update()

        State.__init__(self, data=self.data)
 def __init__(self, location, photo_block=None):
     """
     :param location: A tuple, the coordinate (x,y,z) of drone
     :param photo_block: A DroneBlock
     """
     self.x = location[0]
     self.y = location[1]
     self.z = location[2]
     self.photo_block = photo_block
     State.__init__(self, data=[location, photo_block])
 def __init__(self, xr, yr, u, r, d, l, xg, yg):
     State.__init__(self, data=[xr, yr, u, r, d, l, xg, yg])
     self.xr = round(xr, 5)
     self.yr = round(yr, 5)
     self.u = u
     self.r = r
     self.d = d
     self.l = l
     self.xg = round(xg, 5)
     self.yg = round(yg, 5)
Example #15
0
 def __init__(self, x, y, color):
     '''
     Args:
         x (int)
         y (int)
         color (int)
     '''
     State.__init__(self, data=[x, y, color])
     self.x = round(x, 3)
     self.y = round(y, 3)
     self.color = color
Example #16
0
    def __init__(self, board):
        '''
        init is just the initialiser method that takes in the board of the 
        2048 game.

        Parameters 
        ----------
        board : nparray
            the board represents the 2048 numpy array.
        '''
        State.__init__(self, data=board.flatten().tolist())
        self.board = board
    def __init__(self, robot, doors, rooms, blocks):
        '''
        Args:
            robot (CleanupL1Robot)
            doors (list): list of all the CleanupL1Door objects
            rooms (list): list of all the CleanupL1Room objects
            blocks (list): list of all the CleanupL1Block objects
        '''
        self.robot = robot
        self.doors = doors
        self.rooms = rooms
        self.blocks = blocks

        State.__init__(self, data=[robot, doors, rooms, blocks])
    def get_init_state(self):
        features = [self.init_loc[0], self.init_loc[1]]
        for rock in self.init_rocks:
            int_rock = [int(f) for f in rock]
            features += list(int_rock)

        return State(data=features)
Example #19
0
 def __init__(self, task, x, y, blocks=[], doors=[], rooms=[]):
     '''
     :param task: The given CleanUpTask
     :param x: Agent x coordinate
     :param y: Agent y coordinate
     :param blocks: List of blocks
     :param doors: List of doors
     :param rooms: List of rooms
     '''
     self.x = x
     self.y = y
     self.blocks = blocks
     self.doors = doors
     self.rooms = rooms
     self.task = task
     State.__init__(self, data=[task, (x, y), blocks, doors, rooms])
Example #20
0
 def __init__(self, task, x, y, blocks=[], doors=[], rooms=[]):
     '''
     :param task: The given CleanUpTask
     :param x: Agent x coordinate
     :param y: Agent y coordinate
     :param blocks: List of blocks
     :param doors: List of doors
     :param rooms: List of rooms
     '''
     self.x = x
     self.y = y
     self.blocks = blocks
     self.doors = doors
     self.rooms = rooms
     self.task = task
     State.__init__(self, data=[task, (x, y), blocks, doors, rooms])
Example #21
0
    def __init__(self, num_pegs=3, num_discs=3, gamma=0.95):
        '''
        Args:
            num_pegs (int)
            num_discs (int)
            gamma (float)
        '''
        self.num_pegs = num_pegs
        self.num_discs = num_discs
        HanoiMDP.ACTIONS = [
            str(x) + str(y) for x, y in itertools.product(
                range(self.num_pegs), range(self.num_pegs)) if x != y
        ]

        # Setup init state.
        init_state = [" " for peg in range(num_pegs)]
        x = ""
        for i in range(num_discs):
            x += chr(97 + i)
        init_state[0] = x
        init_state = State(data=init_state)

        MDP.__init__(self,
                     HanoiMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)
Example #22
0
    def make_state(self, x, y):
        features = [x, y]
        for rock in self.rocks:
            int_rock = [int(f) for f in rock]
            features += list(int_rock)

        return State(data=features)
    def __init__(self, position, theta, velocity, theta_dot, done):
        """
        Args:
            position (np.ndarray)
            theta (float)
            velocity (np.ndarray)
            theta_dot (float)
            done (bool)
        """
        self.position = position
        self.theta = theta
        self.velocity = velocity
        self.theta_dot = theta_dot
        features = [
            position[0], position[1], theta, velocity[0], velocity[1],
            theta_dot
        ]

        State.__init__(self, data=features, is_terminal=done)
    def phi(self, state):
        '''
        Args:
            state (State)

        Returns:
            state (State)
        '''

        # Setup phi for new states.
        if state not in self._phi.keys():
            if len(self._phi.values()) > 0:
                self._phi[state] = max(self._phi.values()) + 1
            else:
                self._phi[state] = 1

        abstr_state = State(self._phi[state])
        abstr_state.set_terminal(state.is_terminal())

        return abstr_state
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return state

        if action == "sample":
            # Sample action.
            rock_index = self._get_rock_at_agent_loc(state)
            if rock_index != None:
                # Set to false.
                new_data = state.data[:]
                new_data[rock_index] = False
                next_state = State(data=new_data)
            else:
                next_state = State(data=state.data)

        elif action == "up" and state.data[1] < self.height:
            next_state = State(data=[state.data[0], state.data[1] + 1] + state.data[2:])
        elif action == "down" and state.data[1] > 1:
            next_state = State(data=[state.data[0], state.data[1] - 1] + state.data[2:])
        elif action == "right" and state.data[0] < self.width:
            next_state = State(data=[state.data[0] + 1, state.data[1]] + state.data[2:])
        elif action == "left" and state.data[0] > 1:
            next_state = State(data=[state.data[0] - 1, state.data[1]] + state.data[2:])
        else:
            next_state = State(data=state.data)

        if next_state[0] > 7:
            next_state.set_terminal(True)

        return next_state
Example #26
0
 def __init__(self, root_grounded_task, policy_generators, base_mdp):
     '''
     AbstractMDP solver class
     Args:
         root_grounded_task (RootTaskNode)
         policy_generators (list) of type objects (one for each level below the root)
         base_mdp (MDP): Lowest level environment MDP
     '''
     self.root_grounded_task = root_grounded_task
     self.policy_generators = policy_generators
     self.base_mdp = base_mdp
     self.state_stack = []
     self.policy_stack = []
     for i in range(len(policy_generators)):
         self.state_stack.append(State())
         self.policy_stack.append(defaultdict())
     self.max_level = len(self.policy_generators) - 1
     self.action_to_task_map = defaultdict()
     self._construct_action_to_node_map(root_grounded_task)
     self.max_iterate = 100  # YS
Example #27
0
    def _transition_func(self, state, action):
        '''
        transition_func is a method that essentially alows the rl agent to make
        a move, which creates the new state where the board is moved after the
        agent inputs a move.

        Parameters
        ----------
        state : state
            Represents the old state of the board state

        action : str
            Represents the move that the rl agent makes in order to go to the
            next state.

        Returns
        ----------
        state : state
            Represents the new state after the action beforehand is taken.
        '''
        b = Board(np.asarray(state.data).reshape((4, 4)))
        return State(self, b.moveAndUpdateBoard(action).board.flatten().tolist())
Example #28
0
 def __init__(self,
              num_arms=10,
              distr_family=np.random.normal,
              distr_params=None):
     '''
     Args:
         num_arms (int): Number of arms.
         distr_family (lambda): A function from numpy which, when given
             entities from @distr_params, samples from the distribution family.
         distr_params (dict): If None is given, default mu/sigma for normal
             distribution are initialized randomly.
     '''
     BanditMDP.ACTIONS = [str(i) for i in range(1, num_arms + 1)]
     MDP.__init__(self,
                  BanditMDP.ACTIONS,
                  self._transition_func,
                  self._reward_func,
                  init_state=State(1),
                  gamma=1.0)
     self.num_arms = num_arms
     self.distr_family = distr_family
     self.distr_params = self.init_distr_params(
     ) if distr_params is None else distr_params
Example #29
0
 def __init__(self, room_number, q, is_terminal=False):
     State.__init__(self, data=[room_number, q], is_terminal=is_terminal)
     self.agent_in_room_number = room_number
     self.q = q  # logic state
 def __init__(self, x, y):
     State.__init__(self, data=[x, y])
     self.x = round(x, 5)
     self.y = round(y, 5)
Example #31
0
 def __init__(self, floor_number, q, is_terminal=False):
     State.__init__(self, data=[floor_number, q], is_terminal=is_terminal)
     self.agent_on_floor_number = floor_number
     self.q = q  # logic state
 def __init__(self):
     MarkovGameMDP.__init__(self,
                            RockPaperScissorsMDP.ACTIONS,
                            self._transition_func,
                            self._reward_func,
                            init_state=State())
Example #33
0
 def __init__(self, data=[], is_terminal=False):
     self.data = data
     State.__init__(self, data=data, is_terminal=is_terminal)
 def __init__(self, models):
     State.__init__(self, data=[models])
     self.models = models
 def __init__(self, a_x, a_y, b_x, b_y):
     State.__init__(self, data=[a_x, a_y, b_x, b_y])
     self.a_x = a_x
     self.a_y = a_y
     self.b_x = b_x
     self.b_y = b_y
 def __init__(self, x, y, color):
     self.color = color
     State.__init__(self, data=[x, y, color])
     self.x = round(x, 3)
     self.y = round(y, 3)
 def __init__(self, room_number, is_terminal=False, items=[]):
     State.__init__(self, data=[room_number], is_terminal=is_terminal)
     self.agent_in_room_number = room_number
     self.items = items
 def __init__(self, x, y, phi=lambda state: [state.x, state.y]):
     State.__init__(self, data=[x, y])
     self.x = x
     self.y = y
     self.phi = phi
Example #39
0
 def __init__(self, name):
     self.name = name
     is_terminal = name == 'goal'
     State.__init__(self, data=name, is_terminal=is_terminal)
Example #40
0
 def __init__(self, data,is_terminal=False):
     State.__init__(self, data=data, is_terminal=is_terminal)