def generate_eval_envs(eval_env: OnlineFlexibleResourceAllocationEnv, num_evals: int, folder: str, overwrite: bool = False) -> List[str]: """ Generates and saves the evaluation environment used for evaluating training of the agents Args: eval_env: The evaluation environment used to generate the files num_evals: The number of environments generated folder: The folder where the environments are generated overwrite: If to overwrite previous environments saved Returns: A list of environment file paths """ if not os.path.exists(folder): os.makedirs(folder) eval_files = [] for eval_num in range(num_evals): eval_file = f'{folder}/eval_{eval_num}.env' eval_files.append(eval_file) if overwrite or not os.path.exists(eval_file): eval_env.reset() eval_env.save_env(eval_file) return eval_files
def test_env_load_settings(): env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env') env_state = env.reset() for server, tasks in env_state.server_tasks.items(): server.assert_valid() for task in tasks: task.assert_valid() for task in env._unallocated_tasks: task.assert_valid()
def test_env_save_load(): # TODO add comments env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env') state = env.reset() random_task_pricing = RandomTaskPricingAgent(0) random_resource_weighting = RandomResourceWeightingAgent(0) for _ in range(40): if state.auction_task is not None: actions = { server: random_task_pricing.bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } else: actions = { server: random_resource_weighting.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } state, rewards, done, info = env.step(actions) env.save_env('env/settings/tmp/save.env') loaded_env, loaded_env_state = env.load_env('env/settings/tmp/save.env') assert state.auction_task == loaded_env_state.auction_task assert len(env._unallocated_tasks) == len(loaded_env._unallocated_tasks) for task, loaded_task in zip(env._unallocated_tasks, loaded_env._unallocated_tasks): assert task == loaded_task for server, tasks in state.server_tasks.items(): loaded_server, loaded_tasks = next(((loaded_server, loaded_tasks) for loaded_server, loaded_tasks in state.server_tasks.items() if loaded_server.name == server.name), (None, None)) assert loaded_server is not None and loaded_tasks is not None assert server.name == loaded_server.name and server.storage_cap == loaded_server.storage_cap and \ server.computational_cap == loaded_server.computational_cap and \ server.bandwidth_cap == loaded_server.bandwidth_cap for task, loaded_task in zip(tasks, loaded_tasks): assert task.name == loaded_task.name and task.required_storage == loaded_task.required_storage and \ task.required_computation == loaded_task.required_computation and \ task.required_results_data == loaded_task.required_results_data and \ task.auction_time == loaded_task.auction_time and task.deadline == loaded_task.deadline and \ task.stage is loaded_task.stage and task.loading_progress == loaded_task.loading_progress and \ task.compute_progress == loaded_task.compute_progress and \ task.sending_progress == loaded_task.sending_progress and task.price == loaded_task.price task.assert_valid() loaded_env.save_env('env/settings/tmp/loaded_save.env') with open('env/settings/tmp/save.env') as env_file: env_file_data = env_file.read() with open('env/settings/tmp/loaded_save.env') as loaded_env_file: loaded_env_file_data = loaded_env_file.read() assert env_file_data == loaded_env_file_data
def train_agent(training_env: OnlineFlexibleResourceAllocationEnv, pricing_agents: List[TaskPricingRLAgent], weighting_agents: List[ResourceWeightingRLAgent]): """ Trains reinforcement learning agents through the provided environment Args: training_env: Training environment used pricing_agents: A list of reinforcement learning task pricing agents weighting_agents: A list of reinforcement learning resource weighting agents """ # Reset the environment getting a new training environment for this episode state = training_env.reset() # Allocate the servers with their random task pricing and resource weighting agents server_pricing_agents: Dict[Server, TaskPricingRLAgent] = { server: rnd.choice(pricing_agents) for server in state.server_tasks.keys() } server_weighting_agents: Dict[Server, ResourceWeightingRLAgent] = { server: rnd.choice(weighting_agents) for server in state.server_tasks.keys() } # Store each server's auction observations with it being (None for first auction because no observation was seen previously) # the agent state for the auction (auction task, server tasks, server, time), the action taken and if the auction task was won server_auction_states: Dict[Server, Optional[Tuple[TaskPricingState, float, bool]]] = { server: None for server in state.server_tasks.keys() } # For successful auctions, then the agent state of the winning bid, the action taken and the following observation are # all stored in order to be added as an agent observation after the task finishes in order to know if the task was completed or not successful_auction_states: List[Tuple[TaskPricingState, float, TaskPricingState]] = [] # The environment is looped over till the environment is done (the current time step > environment total time steps) done = False while not done: # If the state has a task to be auctioned then find the pricing of each servers as the action if state.auction_task: # Get the bids for each server auction_prices = { server: server_pricing_agents[server].bid(state.auction_task, tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } # Environment step using the pricing actions to get the next state, rewards, done and info next_state, rewards, done, info = training_env.step(auction_prices) # Update the server_auction_observations and auction_trajectories variables with the new next_state info for server, tasks in state.server_tasks.items(): # Generate the current agent's state current_state = TaskPricingState(state.auction_task, tasks, server, state.time_step) if server_auction_states[server]: # If a server auction observation exists # Get the last time steps agent state, action and if the server won the auction previous_state, previous_action, is_previous_auction_win = server_auction_states[server] # If the server won the auction in the last time step then add the info to the auction trajectories if is_previous_auction_win: successful_auction_states.append((previous_state, previous_action, current_state)) else: # Else add the agent state to the agent's replay buffer as a failed auction bid # Else add the observation as a failure to the task pricing rl_agents server_pricing_agents[server].failed_auction_bid(previous_state, previous_action, current_state) # Update the server auction agent states with the current agent state server_auction_states[server] = (current_state, auction_prices[server], server in rewards) else: # Else the environment is at resource allocation stage # For each server and each server task calculate its relative weighting weighting_actions: Dict[Server, Dict[Task, float]] = { server: server_weighting_agents[server].weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } # Environment step using the resource weighting actions to get the next state, rewards, done and info next_state, finished_server_tasks, done, info = training_env.step(weighting_actions) # For each server, there are may be finished tasks due to the resource allocation # therefore add the task pricing auction agent states with the finished tasks for server, finished_tasks in finished_server_tasks.items(): for finished_task in finished_tasks: # Get the successful auction agent state from the list of successful auction agent states successful_auction = next((auction_agent_state for auction_agent_state in successful_auction_states if auction_agent_state[0].auction_task == finished_task), None) if successful_auction is None: print(f'Number of successful auction agent states: {len(successful_auction_states)}') print( f'Number of server tasks: {sum(len(tasks) for tasks in next_state.server_tasks.values())}') print(f'Finished task: {str(finished_task)}\n\n') print(f'State: {str(state)}\n') print(f'Next state: {str(next_state)}') break # Remove the successful auction agent state successful_auction_states.remove(successful_auction) # Unwrap the successful auction agent state tuple auction_state, action, next_auction_state = successful_auction # Add the winning auction bid info to the agent server_pricing_agents[server].winning_auction_bid(auction_state, action, finished_task, next_auction_state) # Add the agent states for resource allocation for server, tasks in state.server_tasks.items(): agent_state = ResourceAllocationState(tasks, server, state.time_step) next_agent_state = ResourceAllocationState(next_state.server_tasks[server], server, next_state.time_step) server_weighting_agents[server].resource_allocation_obs(agent_state, weighting_actions[server], next_agent_state, finished_server_tasks[server]) assert all(task.auction_time <= next_state.time_step <= task.deadline for _, tasks in next_state.server_tasks.items() for task in tasks) # Update the state with the next state state = next_state
def test_env_step_rnd_action(): """ Tests the environment works with random actions """ print() # Generate the environment env = OnlineFlexibleResourceAllocationEnv([ '../src/training/settings/basic.env', '../src/training/settings/large_tasks_servers.env', '../src/training/settings/mixture_tasks_servers.env', '../src/training/settings/limited_resources.env', ]) # Random action agents random_task_pricing, random_resource_weighting = RandomTaskPricingAgent( 0), RandomResourceWeightingAgent(0) # Run the environment multiple times for _ in tqdm(range(200)): state = env.reset() # Number of auction opportunities num_auction_opportunities = len( env._unallocated_tasks) + (1 if state.auction_task else 0) # Number of auction and resource allocation steps taken num_auctions, num_resource_allocations = 0, 0 # Number of environment server num_servers = len(state.server_tasks) # Take random steps over the environment done = False while not done: # Check that the number of servers is constant assert len(state.server_tasks) == num_servers # Generate the actions if state.auction_task: actions: Dict[Server, float] = { server: random_task_pricing.bid(state.auction_task, allocated_tasks, server, state.time_step) for server, allocated_tasks in state.server_tasks.items() } num_auctions += 1 else: actions: Dict[Server, Dict[Task, float]] = { server: random_resource_weighting.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } num_resource_allocations += 1 # Take the action on the environment state, reward, done, info = env.step(actions) assert all(task.auction_time <= state.time_step <= task.deadline for _, tasks in state.server_tasks.items() for task in tasks) for server, tasks in state.server_tasks.items(): server.assert_valid() for task in tasks: task.assert_valid() # Check that the number of auction and resource allocation steps are correct assert state.auction_task is None assert len(env._unallocated_tasks) == 0 assert num_auctions == num_auction_opportunities assert num_resource_allocations == env._total_time_steps + 1