Ejemplo n.º 1
0
def get_trained_agent() -> AgentBN:
    # Environment
    environment = ResourceGatheringEpisodic()

    # Agent
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    agent.train(graph_type=GraphType.SWEEP, limit=10)

    return agent
Ejemplo n.º 2
0
def load():
    # agent_10 = AgentBN.load(
    #     'bn/models/rge_1583857516_0.01.bin'
    # )

    # sorted_v_s_0_10 = sorted(agent_10.v[((2, 4), (0, 0))], key=lambda k: k[0])

    # agent_15 = AgentBN.load(
    #     'bn/models/rge_1583857532_0.01.bin'
    # )

    # sorted_v_s_0_15 = sorted(agent_15.v[((2, 4), (0, 0))], key=lambda k: k[0])

    agent_30: AgentBN = AgentBN.load('bn/models/rge_1583857678_0.01.bin')

    v_s_0_30 = agent_30.v[((2, 4), (0, 0))]
    v_s_0_30_nd = Vector.m3_max(set(v_s_0_30))

    # agent_10625 = AgentBN.load(
    #     filename='bn/models/rge_1583924116_0.01.bin'
    # )

    # sorted_v_s_0_10625 = sorted(agent_10625.v[((2, 4), (0, 0))], key=lambda k: k[0])

    pass
Ejemplo n.º 3
0
def main():
    """
    Evaluate manually predefined policies
    :return:
    """
    # Build agent
    agent: AgentBN = AgentBN(environment=ResourceGatheringEpisodic(),
                             gamma=0.9)

    # Policies
    policies = rge_policies.copy()

    # Simulation
    simulation = dict()

    for n, policy in enumerate(policies):

        # # of policy
        n += 1

        # In this case we need transform to a list of tuples
        policy = list(map(lambda s: (s, policy[s]), policy))

        # Evaluate policy
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.000001)

        # Update simulation
        simulation.update({n: policy_evaluated})

    print(simulation)
Ejemplo n.º 4
0
def train_agent() -> Agent:
    # Environment
    # environment = DeepSeaTreasureRightDownStochastic(columns=3)
    # environment = DeepSeaTreasureRightDown(columns=3)
    # environment = PyramidMDPNoBounces(diagonals=3, n_transition=0.95)
    # environment = DeepSeaTreasure()
    environment = ResourceGathering()

    # Agent
    # agent = AgentMPQ(
    #     environment=environment, hv_reference=environment.hv_reference, alpha=0.1, epsilon=0.4, max_steps=1000
    # )
    # agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, alpha=0.01)
    agent = AgentBN(environment=environment, gamma=.9)

    # Vector precision
    Vector.set_decimal_precision(decimal_precision=0.01)

    # Train agent
    # agent.train(graph_type=GraphType.SWEEP, tolerance=0.00001)
    agent.train(graph_type=GraphType.SWEEP, limit=13)

    return agent
Ejemplo n.º 5
0
def evaluate_predefined_policies():
    # Build agent
    agent: AgentBN = AgentBN(environment=ResourceGatheringEpisodic(),
                             gamma=0.9)

    # Policies
    policies = rge_policies.copy()

    # Simulation
    simulation = dict()

    for n, policy in enumerate(policies):
        # # of policy
        n += 1

        # Evaluate policy
        policy_evaluated = agent.evaluate_policy(policy=policy,
                                                 tolerance=0.000001)

        # Update simulation
        simulation.update({n: policy_evaluated})

    print(simulation)
Ejemplo n.º 6
0
def draft_w():
    gamma = .9

    # for decimal_precision in [0.01, 0.005, 1, 0.5, 0.05, 0.1]:
    for decimal_precision in [0.005]:

        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)
        tolerance = decimal_precision

        for i in ['full']:

            # Create environment
            for environment in [ResourceGathering()]:

                # Create agent
                agent_bn = AgentBN(environment=environment,
                                   convergence_graph=False,
                                   gamma=gamma)

                # Time train
                t0 = time.time()

                # # Calc number of sweeps limit
                print('{} cols \ntolerance: {}'.format(i, tolerance))

                agent_bn.train(graph_type=GraphType.SWEEP,
                               tolerance=tolerance,
                               sweeps_dump=30)

                # Calc total time
                total_time = time.time() - t0

                # Convert to vectors
                vectors = {
                    key: [vector.tolist() for vector in vectors]
                    for key, vectors in agent_bn.v.items()
                }

                # Prepare full_data to dumps
                data = {
                    'time': '{}s'.format(total_time),
                    'memory': {
                        'v_s_0':
                        len(agent_bn.v[environment.initial_state]),
                        'full':
                        sum(len(vectors) for vectors in agent_bn.v.values())
                    },
                    'vectors': vectors
                }

                # Configuration of environment
                environment_info = vars(environment).copy()
                environment_info.pop('_action_space', None)
                environment_info.pop('np_random', None)

                # Configuration of agent
                agent_info = {
                    'gamma': agent_bn.gamma,
                    'initial_q_value': agent_bn.initial_q_value,
                    'initial_seed': agent_bn.initial_seed,
                    'interval_to_get_data': agent_bn.interval_to_get_data,
                    'total_sweeps': agent_bn.total_sweeps,
                    'tolerance': tolerance
                }

                # Extra data
                data.update({'environment': environment_info})
                data.update({'agent': agent_info})

                # Dumps partial execution
                # dumps(data=data, columns=i, environment=environment)
                dumps(data=data, environment=environment)

                # Dump agent
                agent_bn.save()
Ejemplo n.º 7
0
def main():
    # Define gamma
    gamma = .9
    # Each 30 sweeps make it a dump
    sweeps_dumps = 30

    for decimal_precision in [0.01, 0.005]:
        # Set numbers of decimals allowed
        Vector.set_decimal_precision(decimal_precision=decimal_precision)

        # Define same tolerance that decimal precision, but is possible to change
        tolerance = decimal_precision

        # Create environment
        environment = ResourceGatheringEpisodic()

        # Create agent
        agent_bn = AgentBN(environment=environment, convergence_graph=False, gamma=gamma)

        # Time train
        t0 = time.time()

        print('Training with tolerance: {}...'.format(tolerance))

        agent_bn.train(graph_type=GraphType.SWEEP, tolerance=tolerance, sweeps_dump=sweeps_dumps)

        # Calc total time
        total_time = time.time() - t0

        # Convert to vectors
        vectors = {key: [vector.tolist() for vector in vectors] for key, vectors in agent_bn.v.items()}

        # Prepare full_data to dumps
        data = {
            'time': '{}s'.format(total_time),
            'memory': {
                'v_s_0': len(agent_bn.v[environment.initial_state]),
                'full': sum(len(vectors) for vectors in agent_bn.v.values())
            },
            'vectors': vectors
        }

        # Configuration of environment
        environment_info = vars(environment).copy()
        environment_info.pop('_action_space', None)
        environment_info.pop('np_random', None)

        # Configuration of agent
        agent_info = {
            'gamma': agent_bn.gamma,
            'initial_q_value': agent_bn.initial_q_value,
            'initial_seed': agent_bn.initial_seed,
            'interval_to_get_data': agent_bn.interval_to_get_data,
            'total_sweeps': agent_bn.total_sweeps,
            'tolerance': tolerance
        }

        # Extra data
        data.update({'environment': environment_info})
        data.update({'agent': agent_info})

        # Dumps partial execution
        dumps(data=data, environment=environment)

        # Dump binary information
        agent_bn.save()