Exemple #1
0
def load():
    # agent_10 = AgentBN.load(
    #     'bn/models/rge_1583857516_0.01.bin'
    # )

    # sorted_v_s_0_10 = sorted(agent_10.v[((2, 4), (0, 0))], key=lambda k: k[0])

    # agent_15 = AgentBN.load(
    #     'bn/models/rge_1583857532_0.01.bin'
    # )

    # sorted_v_s_0_15 = sorted(agent_15.v[((2, 4), (0, 0))], key=lambda k: k[0])

    agent_30: AgentBN = AgentBN.load('bn/models/rge_1583857678_0.01.bin')

    v_s_0_30 = agent_30.v[((2, 4), (0, 0))]
    v_s_0_30_nd = Vector.m3_max(set(v_s_0_30))

    # agent_10625 = AgentBN.load(
    #     filename='bn/models/rge_1583924116_0.01.bin'
    # )

    # sorted_v_s_0_10625 = sorted(agent_10625.v[((2, 4), (0, 0))], key=lambda k: k[0])

    pass
Exemple #2
0
def main():
    # Get trained agent
    print('Training agent...')
    agent: AgentBN = get_trained_agent()

    # Set initial state
    initial_state = ((2, 4), (0, 0), False)

    # Initial vectors
    v_s_0 = agent.v[initial_state]
    vectors = Vector.m3_max(set(v_s_0))

    # Show information
    print('Vectors obtained after m3_max algorithm: ')
    print(vectors, end='\n\n')

    # Define a tolerance
    decimal_precision = 0.0000001

    # Simulation
    simulation = dict()

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=decimal_precision)

    print('Evaluating policies gotten...')

    # For each vector
    for vector in vectors:
        # Specify objective vector
        objective_vector = vector.copy()

        print('Recovering policy for objective vector: {}...'.format(
            objective_vector))

        # Get simulation from this agent
        policy = agent.recover_policy(initial_state=initial_state,
                                      objective_vector=objective_vector,
                                      iterations_limit=agent.total_sweeps)

        print('Evaluating policy obtaining...', end='\n\n')

        # Train until converge with `decimal_precision` tolerance.
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=decimal_precision)

        # Save policy and it evaluation.
        simulation.update({objective_vector: (policy, policy_evaluated)})

    print(simulation)
Exemple #3
0
def main():
    # Get trained agent
    agent: AgentBN = get_trained_agent()

    # Set initial state
    initial_state = ((2, 4), (0, 0))

    # agent: AgentBN = AgentBN.load(
    #     filename='bn/models/rg_1584437328_0.005.bin'
    # )

    v_s_0 = agent.v[initial_state]
    vectors = Vector.m3_max(set(v_s_0))

    # Simulation
    simulation = dict()

    # Set decimal precision
    Vector.set_decimal_precision(decimal_precision=0.0000001)

    for vector in vectors:
        # Recreate the index objective vector.
        # objective_vector = IndexVector(
        #     index=vector, vector=trained_agent.v[initial_state][vector]
        # )

        objective_vector = vector.copy()

        # Get simulation from this agent
        policy = agent.recover_policy(initial_state=initial_state,
                                      objective_vector=objective_vector,
                                      iterations_limit=agent.total_sweeps)

        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.0000001)

        simulation.update({objective_vector: (policy, policy_evaluated)})

    print(simulation)
Exemple #4
0
from pathlib import Path

from models import Vector

vectors_10 = Vector.m3_max(
    set(
        map(Vector, [[-0.33, 0.0, 0.0], [-0.16, 0.39, 0.0], [-0.15, 0.0, 0.32],
                     [-0.14, 0.39, 0.0], [-0.1, 0.0, 0.35], [-0.04, 0.35, 0.0],
                     [0.0, 0.0, 0.39], [0.0, 0.0, 0.0]])))

vectors_15 = Vector.m3_max(
    set(
        map(Vector,
            [[-0.45, 0.0, 0.0], [-0.44, 0.0, 0.02], [-0.42, 0.06, 0.02],
             [-0.39, 0.14, 0.0], [-0.38, 0.15, 0.02], [-0.35, 0.14, 0.14],
             [-0.34, 0.17, 0.13], [-0.33, 0.0, 0.17], [-0.33, 0.14, 0.17],
             [-0.33, 0.16, 0.16], [-0.3, 0.27, 0.0], [-0.29, 0.23, 0.15],
             [-0.29, 0.22, 0.17], [-0.21, 0.27, 0.24], [-0.21, 0.25, 0.27],
             [-0.2, 0.23, 0.28], [-0.16, 0.44, 0.0], [-0.16, 0.03, 0.33],
             [-0.16, 0.29, 0.28], [-0.16, 0.28, 0.29], [-0.16, 0.43, 0.02],
             [-0.15, 0.44, 0.0], [-0.15, 0.0, 0.34], [-0.14, 0.43, 0.02],
             [-0.14, 0.28, 0.29], [-0.14, 0.29, 0.28], [-0.14, 0.26, 0.3],
             [-0.09, 0.0, 0.37], [-0.05, 0.24, 0.23], [-0.04, 0.23, 0.23],
             [0.0, 0.0, 0.39], [0.0, 0.0, 0.0], [0.0, 0.32, 0.0]])))

vectors_30 = Vector.m3_max(
    set(
        map(Vector,
            [[-0.57, 0.04, 0.04], [-0.57, 0.0, 0.0], [-0.57, 0.0, 0.04],
             [-0.57, 0.04, 0.0], [-0.53, 0.09, 0.04], [-0.52, 0.0, 0.09],
             [-0.52, 0.03, 0.09], [-0.51, 0.07, 0.09], [-0.24, 0.27, 0.26],
Exemple #5
0
 def filter_vectors(vectors: set) -> list:
     # ND[vectors]
     return Vector.m3_max(vectors=vectors)
Exemple #6
0
    def simulate_state(self, state: object) -> set:
        """
        Given an state iterates about itself, extracting all possible actions, the reachable states from it, and
        calculating it reward.
        :param state:
        :return:
        """

        # Set current state
        self.environment.current_state = state

        # Set of vectors
        vectors = set()

        # For each possible action
        for action in self.environment.action_space:

            # Get all reachable states
            reachable_states = self.environment.reachable_states(state=state,
                                                                 action=action)

            # Set of vectors
            total_vectors = set()

            # Associate states and vectors
            associate_states = list()
            associate_vectors = list()

            for reachable_state in reachable_states:

                # If next_state is unknown create with a zero vector set.
                if reachable_state not in self.states_vectors:
                    self.states_vectors.update({
                        reachable_state:
                        {self.environment.default_reward.zero_vector}
                    })

                # Calculate reward
                reward = self.environment.transition_reward(
                    state=state, action=action, next_state=reachable_state)

                # Get previous vectors
                accumulated_vectors = set(
                    map(lambda x: x + reward,
                        self.states_vectors[reachable_state]))

                associate_states.append(reachable_state)
                associate_vectors.append(accumulated_vectors)

                # Add current vectors to total vectors
                total_vectors = total_vectors.union(accumulated_vectors)

            self.states_vectors.update({state: total_vectors})

            # For each next state
            for product_vectors in itertools.product(*associate_vectors):

                # Extract zero vector
                vector = self.environment.default_reward.zero_vector

                for i, reward in enumerate(product_vectors):
                    # Next state
                    reachable_state = associate_states[i]

                    # Calculate probability
                    probability = self.environment.transition_probability(
                        state=state, action=action, next_state=reachable_state)

                    # Calc total vector
                    vector += (reward * probability)

                # Add to set of vectors
                vectors.add(vector)

        if self.limited_precision:
            vectors = map(
                lambda x: un.round_with_precision(x, Vector.decimal_precision),
                vectors)

        # Return all vectors found
        return set(Vector.m3_max(vectors))