Exemple #1
0
def multi_env_single_env_training(folder, datetime, primary_writer, task_pricing_agents, resource_weighting_agents,
                                  multi_env_training: bool = True, total_episodes: int = 600, eval_freq: int = 10):
    """
    Multi and single environment training

    Args:
        folder: Training folder name
        datetime: The datetime of the training
        primary_writer: The primary writer for the multiple environment
        task_pricing_agents: List of task pricing agents
        resource_weighting_agents: List of resource weighting agents
        multi_env_training: If to use multi env training
        total_episodes: Number of training episodes
        eval_freq: The evaluation frequency
    """
    single_env = OnlineFlexibleResourceAllocationEnv('./training/settings/basic.env')
    multi_env = OnlineFlexibleResourceAllocationEnv([
        './training/settings/basic.env',
        './training/settings/large_tasks_servers.env',
        './training/settings/limited_resources.env',
        './training/settings/mixture_tasks_servers.env'
    ])

    multi_envs_eval = generate_eval_envs(multi_env, 20, f'./training/settings/eval_envs/multi_env/')
    single_env_eval = generate_eval_envs(single_env, 5, f'./training/settings/eval_envs/single_env/')
    single_env_eval_writer = tf.summary.create_file_writer(f'training/results/logs/{folder}_single_env_{datetime}')

    # Loop over the episodes
    for episode in range(total_episodes):
        if episode % 5 == 0:
            print(f'Episode: {episode} at {dt.datetime.now().strftime("%H:%M:%S")}')
        with primary_writer.as_default():
            if multi_env_training:
                train_agent(multi_env, task_pricing_agents, resource_weighting_agents)
            else:
                train_agent(single_env, task_pricing_agents, resource_weighting_agents)

        # Every eval_frequency episodes, the agents are evaluated
        if episode % eval_freq == 0:
            with primary_writer.as_default():
                eval_agent(multi_envs_eval, episode, task_pricing_agents, resource_weighting_agents)
            with single_env_eval_writer.as_default():
                eval_agent(single_env_eval, episode, task_pricing_agents, resource_weighting_agents)

    for agent in task_pricing_agents:
        agent.save()
    for agent in resource_weighting_agents:
        agent.save()
def test_train_agents():
    print()
    setup_tensorboard('training/tmp/', 'train_agents')

    env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env')

    pricing_agents = [
        TaskPricingDqnAgent(0,
                            create_rnn_dqn_network(9, 5),
                            batch_size=16,
                            initial_training_replay_size=16,
                            training_freq=100),
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(2,
                                  create_rnn_dqn_network(16, 5),
                                  batch_size=16,
                                  initial_training_replay_size=16,
                                  training_freq=100)
    ]

    for _ in range(2):
        train_agent(env, pricing_agents, weighting_agents)

    # noinspection PyTypeChecker
    agents: List[
        ReinforcementLearningAgent] = pricing_agents + weighting_agents
    for agent in agents:
        assert 0 < agent.total_updates
Exemple #3
0
def test_env_load_settings():
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    env_state = env.reset()

    for server, tasks in env_state.server_tasks.items():
        server.assert_valid()
        for task in tasks:
            task.assert_valid()

    for task in env._unallocated_tasks:
        task.assert_valid()
Exemple #4
0
def test_env_save_load():
    # TODO add comments
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    state = env.reset()

    random_task_pricing = RandomTaskPricingAgent(0)
    random_resource_weighting = RandomResourceWeightingAgent(0)

    for _ in range(40):
        if state.auction_task is not None:
            actions = {
                server: random_task_pricing.bid(state.auction_task, tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        else:
            actions = {
                server: random_resource_weighting.weight(tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        state, rewards, done, info = env.step(actions)

    env.save_env('env/settings/tmp/save.env')
    loaded_env, loaded_env_state = env.load_env('env/settings/tmp/save.env')

    assert state.auction_task == loaded_env_state.auction_task
    assert len(env._unallocated_tasks) == len(loaded_env._unallocated_tasks)
    for task, loaded_task in zip(env._unallocated_tasks, loaded_env._unallocated_tasks):
        assert task == loaded_task
    for server, tasks in state.server_tasks.items():
        loaded_server, loaded_tasks = next(((loaded_server, loaded_tasks)
                                            for loaded_server, loaded_tasks in state.server_tasks.items()
                                            if loaded_server.name == server.name), (None, None))
        assert loaded_server is not None and loaded_tasks is not None
        assert server.name == loaded_server.name and server.storage_cap == loaded_server.storage_cap and \
            server.computational_cap == loaded_server.computational_cap and \
            server.bandwidth_cap == loaded_server.bandwidth_cap
        for task, loaded_task in zip(tasks, loaded_tasks):
            assert task.name == loaded_task.name and task.required_storage == loaded_task.required_storage and \
                task.required_computation == loaded_task.required_computation and \
                task.required_results_data == loaded_task.required_results_data and \
                task.auction_time == loaded_task.auction_time and task.deadline == loaded_task.deadline and \
                task.stage is loaded_task.stage and task.loading_progress == loaded_task.loading_progress and \
                task.compute_progress == loaded_task.compute_progress and \
                task.sending_progress == loaded_task.sending_progress and task.price == loaded_task.price
            task.assert_valid()

    loaded_env.save_env('env/settings/tmp/loaded_save.env')
    with open('env/settings/tmp/save.env') as env_file:
        env_file_data = env_file.read()
    with open('env/settings/tmp/loaded_save.env') as loaded_env_file:
        loaded_env_file_data = loaded_env_file.read()
    assert env_file_data == loaded_env_file_data
def test_train_rnd_agents():
    print()
    setup_tensorboard('training/tmp/', 'train_rnd_agents')
    env = OnlineFlexibleResourceAllocationEnv([
        '../src/training/settings/basic.env',
        '../src/training/settings/large_tasks_servers.env',
        '../src/training/settings/mixture_tasks_servers.env',
        '../src/training/settings/limited_resources.env',
    ])

    pricing_agents = [RandomTaskPricingRLAgent(0)]
    weighting_agents = [RandomResourceWeightingRLAgent(0)]

    for _ in range(10):
        train_agent(env, pricing_agents, weighting_agents)
def test_agent_evaluation():
    print()
    setup_tensorboard('training/results/tmp/', 'agent_eval')

    env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env')

    eval_envs = generate_eval_envs(env,
                                   5,
                                   'training/settings/tmp/',
                                   overwrite=True)
    assert len(os.listdir('training/settings/tmp/')) == 5
    total_auctions, total_resource_allocation = 0, 0
    for eval_env in eval_envs:
        env, state = OnlineFlexibleResourceAllocationEnv.load_env(eval_env)
        total_auctions += len(env._unallocated_tasks) + (
            1 if state.auction_task is not None else 0)
        total_resource_allocation += env._total_time_steps + 1

    pricing_agents = [
        TaskPricingDqnAgent(0, create_bidirectional_dqn_network(9, 5)),
        TaskPricingDdpgAgent(1, create_lstm_actor_network(9),
                             create_lstm_critic_network(9))
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(2, create_bidirectional_dqn_network(16, 5)),
        ResourceWeightingDdpgAgent(3, create_lstm_actor_network(16),
                                   create_lstm_critic_network(16)),
    ]

    results = eval_agent(eval_envs, 0, pricing_agents, weighting_agents)
    print(
        f'Results - Total prices: {results.total_prices}, Number of completed tasks: {results.num_completed_tasks}, '
        f'failed tasks: {results.num_failed_tasks}, winning prices: {results.winning_prices}, '
        f'Number of auctions: {results.num_auctions}, resource allocations: {results.num_resource_allocations}'
    )
    assert 0 < results.num_completed_tasks
    assert 0 < results.num_failed_tasks

    assert results.num_auctions == total_auctions
    assert results.num_resource_allocations == total_resource_allocation
Exemple #7
0
from agents.rl_agents.agents.dqn import ResourceWeightingDqnAgent, TaskPricingDqnAgent
from agents.rl_agents.neural_networks.dqn_networks import create_bidirectional_dqn_network
from env.environment import OnlineFlexibleResourceAllocationEnv
from training.train_agents import generate_eval_envs, run_training, setup_tensorboard

if __name__ == "__main__":
    # Setup tensorboard
    folder = 'bidirectional_agents'
    writer, datetime = setup_tensorboard('training/results/logs/', folder)

    save_folder = f'{folder}_{datetime}'

    env = OnlineFlexibleResourceAllocationEnv([
        './training/settings/basic.env',
        './training/settings/large_tasks_servers.env',
        './training/settings/limited_resources.env',
        './training/settings/mixture_tasks_servers.env'
    ])
    eval_envs = generate_eval_envs(
        env, 20, f'./training/settings/eval_envs/network_arch/')

    task_pricing_agents = [
        TaskPricingDqnAgent(agent_num,
                            create_bidirectional_dqn_network(9, 21),
                            save_folder=save_folder) for agent_num in range(3)
    ]
    resource_weighting_agents = [
        ResourceWeightingDqnAgent(0,
                                  create_bidirectional_dqn_network(16, 11),
                                  save_folder=save_folder)
    ]
def test_env_step_rnd_action():
    """
    Tests the environment works with random actions
    """
    print()

    # Generate the environment
    env = OnlineFlexibleResourceAllocationEnv([
        '../src/training/settings/basic.env',
        '../src/training/settings/large_tasks_servers.env',
        '../src/training/settings/mixture_tasks_servers.env',
        '../src/training/settings/limited_resources.env',
    ])

    # Random action agents
    random_task_pricing, random_resource_weighting = RandomTaskPricingAgent(
        0), RandomResourceWeightingAgent(0)

    # Run the environment multiple times
    for _ in tqdm(range(200)):
        state = env.reset()

        # Number of auction opportunities
        num_auction_opportunities = len(
            env._unallocated_tasks) + (1 if state.auction_task else 0)
        # Number of auction and resource allocation steps taken
        num_auctions, num_resource_allocations = 0, 0
        # Number of environment server
        num_servers = len(state.server_tasks)

        # Take random steps over the environment
        done = False
        while not done:
            # Check that the number of servers is constant
            assert len(state.server_tasks) == num_servers

            # Generate the actions
            if state.auction_task:
                actions: Dict[Server, float] = {
                    server: random_task_pricing.bid(state.auction_task,
                                                    allocated_tasks, server,
                                                    state.time_step)
                    for server, allocated_tasks in state.server_tasks.items()
                }
                num_auctions += 1
            else:
                actions: Dict[Server, Dict[Task, float]] = {
                    server:
                    random_resource_weighting.weight(tasks, server,
                                                     state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                num_resource_allocations += 1

            # Take the action on the environment
            state, reward, done, info = env.step(actions)
            assert all(task.auction_time <= state.time_step <= task.deadline
                       for _, tasks in state.server_tasks.items()
                       for task in tasks)
            for server, tasks in state.server_tasks.items():
                server.assert_valid()
                for task in tasks:
                    task.assert_valid()

        # Check that the number of auction and resource allocation steps are correct
        assert state.auction_task is None
        assert len(env._unallocated_tasks) == 0
        assert num_auctions == num_auction_opportunities
        assert num_resource_allocations == env._total_time_steps + 1
Exemple #9
0
    )

    resource_weighting_agents = [
        ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 11))
    ]
    resource_weighting_agents[0].model_network.load_weights(
        './analysis/fixed_heuristics/eval_agents/Resource_weighting_Dqn_agent_0/update_440898'
    )

    return task_pricing_agents, resource_weighting_agents


if __name__ == "__main__":
    eval_env = OnlineFlexibleResourceAllocationEnv([
        './analysis/fixed_heuristics/settings/basic.env',
        './analysis/fixed_heuristics/settings/large_tasks_servers.env',
        './analysis/fixed_heuristics/settings/limited_resources.env',
        './analysis/fixed_heuristics/settings/mixture_tasks_servers.env'
    ])

    eval_pos = 0
    while True:
        if not os.path.exists(
                f'./analysis/fixed_heuristics/eval_envs_{eval_pos}/'):
            eval_envs = generate_eval_envs(
                eval_env, 20,
                f'./analysis/fixed_heuristics/eval_envs_{eval_pos}/')
            break
        else:
            eval_pos += 1

    task_pricing_agents, resource_weighting_agents = load_agents()