コード例 #1
0
ファイル: Experiment.py プロジェクト: zhexiaozhe/rlpy
    def evaluate(self, total_steps, episode_number, visualize=0):
        """
        Evaluate the current agent within an experiment

        :param total_steps: (int)
                     number of steps used in learning so far
        :param episode_number: (int)
                        number of episodes used in learning so far
        """
        # TODO resolve this hack
        if className(self.agent) == 'PolicyEvaluation':
            # Policy Evaluation Case
            self.result = self.agent.STATS
            return

        random_state = np.random.get_state()
        #random_state_domain = copy(self.domain.random_state)
        elapsedTime = deltaT(self.start_time)
        performance_return = 0.
        performance_steps = 0.
        performance_term = 0.
        performance_discounted_return = 0.
        for j in xrange(self.checks_per_policy):
            p_ret, p_step, p_term, p_dret = self.performanceRun(
                total_steps, visualize=visualize > j)
            performance_return += p_ret
            performance_steps += p_step
            performance_term += p_term
            performance_discounted_return += p_dret
        performance_return /= self.checks_per_policy
        performance_steps /= self.checks_per_policy
        performance_term /= self.checks_per_policy
        performance_discounted_return /= self.checks_per_policy
        self.result["learning_steps"].append(total_steps)
        self.result["return"].append(performance_return)
        self.result["learning_time"].append(self.elapsed_time)
        self.result["num_features"].append(
            self.agent.representation.features_num)
        self.result["steps"].append(performance_steps)
        self.result["terminated"].append(performance_term)
        self.result["learning_episode"].append(episode_number)
        self.result["discounted_return"].append(performance_discounted_return)
        # reset start time such that performanceRuns don't count
        self.start_time = clock() - elapsedTime
        if total_steps > 0:
            remaining = hhmmss(elapsedTime * (self.max_steps - total_steps) /
                               total_steps)
        else:
            remaining = "?"
        self.logger.info(
            self.performance_log_template.format(
                total_steps=total_steps,
                elapsed=hhmmss(elapsedTime),
                remaining=remaining,
                totreturn=performance_return,
                steps=performance_steps,
                num_feat=self.agent.representation.features_num))

        np.random.set_state(random_state)
コード例 #2
0
ファイル: Experiment.py プロジェクト: smcgregor/rlpy
    def evaluate(self, total_steps, episode_number, visualize=0):
        """
        Evaluate the current agent within an experiment

        :param total_steps: (int)
                     number of steps used in learning so far
        :param episode_number: (int)
                        number of episodes used in learning so far
        """
        # TODO resolve this hack
        if className(self.agent) == 'PolicyEvaluation':
            # Policy Evaluation Case
            self.result = self.agent.STATS
            return

        random_state = np.random.get_state()
        #random_state_domain = copy(self.domain.random_state)
        elapsedTime = deltaT(self.start_time)
        performance_return = 0.
        performance_steps = 0.
        performance_term = 0.
        performance_discounted_return = 0.
        for j in xrange(self.checks_per_policy):
            p_ret, p_step, p_term, p_dret = self.performanceRun(
                total_steps, visualize=visualize > j)
            performance_return += p_ret
            performance_steps += p_step
            performance_term += p_term
            performance_discounted_return += p_dret
        performance_return /= self.checks_per_policy
        performance_steps /= self.checks_per_policy
        performance_term /= self.checks_per_policy
        performance_discounted_return /= self.checks_per_policy
        self.result["learning_steps"].append(total_steps)
        self.result["return"].append(performance_return)
        self.result["learning_time"].append(self.elapsed_time)
        self.result["num_features"].append(
            self.agent.representation.features_num)
        self.result["steps"].append(performance_steps)
        self.result["terminated"].append(performance_term)
        self.result["learning_episode"].append(episode_number)
        self.result["discounted_return"].append(performance_discounted_return)
        # reset start time such that performanceRuns don't count
        self.start_time = clock() - elapsedTime
        if total_steps > 0:
            remaining = hhmmss(
                elapsedTime * (self.max_steps - total_steps) / total_steps)
        else:
            remaining = "?"
        self.logger.info(
            self.performance_log_template.format(total_steps=total_steps,
                                                 elapsed=hhmmss(
                                                     elapsedTime),
                                                 remaining=remaining,
                                                 totreturn=performance_return,
                                                 steps=performance_steps,
                                                 num_feat=self.agent.representation.features_num))

        np.random.set_state(random_state)
コード例 #3
0
ファイル: MDPSolver.py プロジェクト: smcgregor/rlpy
    def IsTabularRepresentation(self):
        '''
        Check to see if the representation is Tabular as Policy Iteration and Value Iteration only work with
        Tabular representation
        '''
        return className(self.representation) == 'Tabular'

        return True
コード例 #4
0
ファイル: MDPSolver.py プロジェクト: okkhoy/rlpy
    def IsTabularRepresentation(self):
        '''
        Check to see if the representation is Tabular as Policy Iteration and Value Iteration only work with
        Tabular representation
        '''
        return className(self.representation) == 'Tabular'

        return True
コード例 #5
0
 def __init__(
         self, job_id, representation, domain, planning_time=np.inf, convergence_threshold=.005,
         ns_samples=100, project_path='.', log_interval=500, show=False, epsilon=.1):
     super(
         TrajectoryBasedValueIteration,
         self).__init__(job_id,
                        representation,
                        domain,
                        planning_time,
                        convergence_threshold,
                        ns_samples,
                        project_path,
                        log_interval,
                        show)
     self.epsilon = epsilon
     if className(representation) == 'Tabular':
         self.alpha = 1
コード例 #6
0
 def __init__(self,
              job_id,
              representation,
              domain,
              planning_time=np.inf,
              convergence_threshold=.005,
              ns_samples=100,
              project_path='.',
              log_interval=500,
              show=False,
              epsilon=.1):
     super(TrajectoryBasedValueIteration,
           self).__init__(job_id, representation, domain, planning_time,
                          convergence_threshold, ns_samples, project_path,
                          log_interval, show)
     self.epsilon = epsilon
     if className(representation) == 'Tabular':
         self.alpha = 1
コード例 #7
0
    def pi2(self, s, terminal, p_actions):
        domain = self.representation.domain
        if not className(domain) in self.supportedDomains:
            print("ERROR: There is no fixed policy defined for %s" %
                  className(domain))
            return None

        if className(domain) == 'GridWorld':
            # Actions are Up, Down, Left, Right
            if not self.policyName in self.gridWorldPolicyNames:
                print("Error: There is no GridWorld policy with name %s" %
                      self.policyName)
                return None

            if self.policyName == 'cw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it [immediately
                    # incremented]
                    self.curAction = 0
                while (not (self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 0:  # up
                        self.curAction = 3
                    elif self.curAction == 3:  # right
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 0
                    else:
                        print(
                            'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
                        )
    #                 self.curAction = self.curAction % domain.actions_num
            elif self.policyName == 'ccw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it
                    self.curAction = 1
                while (not (self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 3:  # right
                        self.curAction = 0
                    elif self.curAction == 0:  # up
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 3
                    else:
                        print(
                            'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
                        )

    #                 self.curAction = self.curAction % domain.actions_num

            else:
                print(
                    "Error: No policy defined with name %s, but listed in gridWorldPolicyNames"
                    % self.policyName)
                print(
                    "You need to create a switch statement for the policy name above, or remove it from gridWorldPolicyNames"
                )
                return None
            return self.curAction

# Cycle through actions, starting with 0, causing agent to go in other direction
#             if not hasattr(pi, "curAction"):
# pi.curAction = domain.actions_num-1  # it doesn't exist yet, so initialize it
#             if not(pi.curAction in domain.possibleActions(s)):
#                 pi.curAction -= 1
#                 if pi.curAction < 0: pi.curAction = domain.actions_num-1

        if className(domain) == 'InfCartPoleBalance':
            # Fixed policy rotate the pendulum in the opposite direction of the
            # thetadot
            theta, thetadot = s
            if thetadot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'BlocksWorld':
            # Fixed policy rotate the blocksworld = Optimal Policy (Always pick the next piece of the tower and move it to the tower
            # Policy: Identify the top of the tower.
            # move the next piece on the tower with 95% chance 5% take a random
            # action

            # Random Action with some probability
            # TODO fix isTerminal use here
            if self.random_state.rand() < .3 or domain.isTerminal():
                return randSet(domain.possibleActions(s))

            # non-Random Policy
            # next_block is the block that should be stacked on the top of the tower
            # wrong_block is the highest block stacked on the top of the next_block
            # Wrong_tower_block is the highest stacked on the top of the tower
            blocks = domain.blocks
            # Length of the tower assumed to be built correctly.
            correct_tower_size = 0
            while True:
                # Check the next block
                block = correct_tower_size
                if (block == 0 and domain.on_table(block, s)) or domain.on(
                        block, block - 1, s):
                    # This block is on the right position, check the next block
                    correct_tower_size += 1
                else:
                    # print s
                    # print "Incorrect block:", block
                    # The block is on the wrong place.
                    # 1. Check if the tower is empty => If not take one block from the tower and put it on the table
                    # 2. check to see if this wrong block is empty => If not put one block from its stack and put on the table
                    # 3. Otherwise move this block on the tower

                    ###################
                    # 1
                    ###################
                    # If the first block is in the wrong place, then the tower
                    # top which is table is empty by definition
                    if block != 0:
                        ideal_tower_top = block - 1
                        tower_top = domain.towerTop(ideal_tower_top, s)
                        if tower_top != ideal_tower_top:
                            # There is a wrong block there hence we should put
                            # it on the table first
                            return (
                                # put the top of the tower on the table since
                                # it is not correct
                                domain.getActionPutAonTable(tower_top))
                    ###################
                    # 2
                    ###################
                    block_top = domain.towerTop(block, s)
                    if block_top != block:
                        # The target block to be stacked is not empty
                        return domain.getActionPutAonTable(block_top)
                    ###################
                    # 3
                    ###################
                    if block == 0:
                        return domain.getActionPutAonTable(block)
                    else:
                        return domain.getActionPutAonB(block, block - 1)
        if className(domain) == 'IntruderMonitoring':
            # Each UAV assign themselves to a target
            # Each UAV finds the closest danger zone to its target and go towards there.
            # If UAVs_num > Target, the rest will hold position
            # Move all agents based on the taken action
            agents = np.array(s[:domain.NUMBER_OF_AGENTS * 2].reshape(-1, 2))
            targets = np.array(s[domain.NUMBER_OF_AGENTS * 2:].reshape(-1, 2))
            zones = domain.danger_zone_locations
            # Default action is hold
            actions = np.ones(len(agents), dtype=np.integer) * 4
            planned_agents_num = min(len(agents), len(targets))
            for i in range(planned_agents_num):
                # Find cloasest zone (manhattan) to the corresponding target
                target = targets[i, :]
                distances = np.sum(
                    np.abs(np.tile(target, (len(zones), 1)) - zones), axis=1)
                z_row, z_col = zones[np.argmin(distances), :]
                # find the valid action
                a_row, a_col = agents[i, :]
                a = 4  # hold as a default action
                if a_row > z_row:
                    a = 0  # up
                if a_row < z_row:
                    a = 1  # down
                if a_col > z_col:
                    a = 2  # left
                if a_col < z_col:
                    a = 3  # right
                actions[i] = a
#                print "Agent=", agents[i,:]
#                print "Target", target
#                print "Zone", zones[argmin(distances),:]
#                print "Action", a
#                print '============'
            return vec2id(actions, np.ones(len(agents), dtype=np.integer) * 5)
        if className(domain) == 'SystemAdministrator':
            # Select a broken computer and reset it
            brokenComputers = np.where(s == 0)[0]
            if len(brokenComputers):
                return randSet(brokenComputers)
            else:
                return domain.computers_num
        if className(domain) == 'MountainCar':
            # Accelerate in the direction of the valley
            # WORK IN PROGRESS
            x, xdot = s
            if xdot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'PST':
            # One stays at comm, n-1 stay at target area. Whenever fuel is
            # lower than reaching the base the move back
            print(s)
            s = domain.state2Struct(s)
            uavs = domain.NUM_UAV
            print(s)
            return vec2id(np.zeros(uavs), np.ones(uavs) * 3)
コード例 #8
0
ファイル: Policy.py プロジェクト: okkhoy/rlpy
 def printAll(self):
     """ Prints all class information to console. """
     print(className(self))
     print('=======================================')
     for property, value in vars(self).items():
         print(property, ": ", value)
コード例 #9
0
ファイル: FixedPolicy.py プロジェクト: smcgregor/rlpy
    def pi2(self, s, terminal, p_actions):
        domain = self.representation.domain
        if not className(domain) in self.supportedDomains:
            print "ERROR: There is no fixed policy defined for %s" % className(domain)
            return None

        if className(domain) == 'GridWorld':
            # Actions are Up, Down, Left, Right
            if not self.policyName in self.gridWorldPolicyNames:
                print "Error: There is no GridWorld policy with name %s" % self.policyName
                return None

            if self.policyName == 'cw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it [immediately
                    # incremented]
                    self.curAction = 0
                while (not(self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 0:  # up
                        self.curAction = 3
                    elif self.curAction == 3:  # right
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 0
                    else:
                        print 'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
    #                 self.curAction = self.curAction % domain.actions_num
            elif self.policyName == 'ccw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it
                    self.curAction = 1
                while (not(self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 3:  # right
                        self.curAction = 0
                    elif self.curAction == 0:  # up
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 3
                    else:
                        print 'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
    #                 self.curAction = self.curAction % domain.actions_num

            else:
                print "Error: No policy defined with name %s, but listed in gridWorldPolicyNames" % self.policyName
                print "You need to create a switch statement for the policy name above, or remove it from gridWorldPolicyNames"
                return None
            return self.curAction

# Cycle through actions, starting with 0, causing agent to go in other direction
#             if not hasattr(pi, "curAction"):
# pi.curAction = domain.actions_num-1  # it doesn't exist yet, so initialize it
#             if not(pi.curAction in domain.possibleActions(s)):
#                 pi.curAction -= 1
#                 if pi.curAction < 0: pi.curAction = domain.actions_num-1

        if className(domain) == 'InfCartPoleBalance':
            # Fixed policy rotate the pendulum in the opposite direction of the
            # thetadot
            theta, thetadot = s
            if thetadot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'BlocksWorld':
            # Fixed policy rotate the blocksworld = Optimal Policy (Always pick the next piece of the tower and move it to the tower
            # Policy: Identify the top of the tower.
            # move the next piece on the tower with 95% chance 5% take a random
            # action

            # Random Action with some probability
            # TODO fix isTerminal use here
            if np.random.rand() < .3 or domain.isTerminal():
                return randSet(domain.possibleActions(s))

            # non-Random Policy
            # next_block is the block that should be stacked on the top of the tower
            # wrong_block is the highest block stacked on the top of the next_block
            # Wrong_tower_block is the highest stacked on the top of the tower
            blocks = domain.blocks
            # Length of the tower assumed to be built correctly.
            correct_tower_size = 0
            while True:
                # Check the next block
                block = correct_tower_size
                if (block == 0 and domain.on_table(block, s)) or domain.on(block, block - 1, s):
                    # This block is on the right position, check the next block
                    correct_tower_size += 1
                else:
                    # print s
                    # print "Incorrect block:", block
                    # The block is on the wrong place.
                    # 1. Check if the tower is empty => If not take one block from the tower and put it on the table
                    # 2. check to see if this wrong block is empty => If not put one block from its stack and put on the table
                    # 3. Otherwise move this block on the tower

                    ###################
                    # 1
                    ###################
                    # If the first block is in the wrong place, then the tower
                    # top which is table is empty by definition
                    if block != 0:
                        ideal_tower_top = block - 1
                        tower_top = domain.towerTop(ideal_tower_top, s)
                        if tower_top != ideal_tower_top:
                            # There is a wrong block there hence we should put
                            # it on the table first
                            return (
                                # put the top of the tower on the table since
                                # it is not correct
                                domain.getActionPutAonTable(tower_top)
                            )
                    ###################
                    # 2
                    ###################
                    block_top = domain.towerTop(block, s)
                    if block_top != block:
                        # The target block to be stacked is not empty
                        return domain.getActionPutAonTable(block_top)
                    ###################
                    # 3
                    ###################
                    if block == 0:
                        return domain.getActionPutAonTable(block)
                    else:
                        return domain.getActionPutAonB(block, block - 1)
        if className(domain) == 'IntruderMonitoring':
            # Each UAV assign themselves to a target
            # Each UAV finds the closest danger zone to its target and go towards there.
            # If UAVs_num > Target, the rest will hold position
            # Move all agents based on the taken action
            agents = np.array(s[:domain.NUMBER_OF_AGENTS * 2].reshape(-1, 2))
            targets = np.array(s[domain.NUMBER_OF_AGENTS * 2:].reshape(-1, 2))
            zones = domain.danger_zone_locations
            # Default action is hold
            actions = np.ones(len(agents), dtype=np.integer) * 4
            planned_agents_num = min(len(agents), len(targets))
            for i in xrange(planned_agents_num):
                # Find cloasest zone (manhattan) to the corresponding target
                target = targets[i, :]
                distances = np.sum(
                    np.abs(np.tile(target, (len(zones), 1)) - zones), axis=1)
                z_row, z_col = zones[np.argmin(distances), :]
                # find the valid action
                a_row, a_col = agents[i, :]
                a = 4  # hold as a default action
                if a_row > z_row:
                    a = 0  # up
                if a_row < z_row:
                    a = 1  # down
                if a_col > z_col:
                    a = 2  # left
                if a_col < z_col:
                    a = 3  # right
                actions[i] = a
#                print "Agent=", agents[i,:]
#                print "Target", target
#                print "Zone", zones[argmin(distances),:]
#                print "Action", a
#                print '============'
            return vec2id(actions, np.ones(len(agents), dtype=np.integer) * 5)
        if className(domain) == 'SystemAdministrator':
            # Select a broken computer and reset it
            brokenComputers = np.where(s == 0)[0]
            if len(brokenComputers):
                return randSet(brokenComputers)
            else:
                return domain.computers_num
        if className(domain) == 'MountainCar':
            # Accelerate in the direction of the valley
            # WORK IN PROGRESS
            x, xdot = s
            if xdot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'PST':
            # One stays at comm, n-1 stay at target area. Whenever fuel is
            # lower than reaching the base the move back
            print s
            s = domain.state2Struct(s)
            uavs = domain.NUM_UAV
            print s
            return vec2id(np.zeros(uavs), np.ones(uavs) * 3)
コード例 #10
0
ファイル: Policy.py プロジェクト: zhuzhenping/rlpy
 def printAll(self):
     """ Prints all class information to console. """
     print(className(self))
     print('=======================================')
     for property, value in vars(self).items():
         print(property, ": ", value)