def test_saving_agent(): print() # Different agents dqn_agent = TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 10), save_folder='tmp') ddpg_agent = TaskPricingDdpgAgent(1, create_lstm_actor_network(9), create_lstm_critic_network(9), save_folder='tmp') td3_agent = TaskPricingTD3Agent(2, create_lstm_actor_network(9), create_lstm_critic_network(9), create_lstm_critic_network(9), save_folder='tmp') # Save the agent dqn_agent.save('agent/checkpoints/') ddpg_agent.save('agent/checkpoints/') td3_agent.save('agent/checkpoints/') # Check that loading works loaded_model = create_lstm_dqn_network(9, 10) loaded_model.load_weights( f'agent/checkpoints/tmp/Task_pricing_Dqn_agent_0/update_0') assert all( tf.reduce_all(weights == load_weights) for weights, load_weights in zip(dqn_agent.model_network.variables, loaded_model.variables))
def test_train_agents(): print() setup_tensorboard('training/tmp/', 'train_agents') env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env') pricing_agents = [ TaskPricingDqnAgent(0, create_rnn_dqn_network(9, 5), batch_size=16, initial_training_replay_size=16, training_freq=100), ] weighting_agents = [ ResourceWeightingDqnAgent(2, create_rnn_dqn_network(16, 5), batch_size=16, initial_training_replay_size=16, training_freq=100) ] for _ in range(2): train_agent(env, pricing_agents, weighting_agents) # noinspection PyTypeChecker agents: List[ ReinforcementLearningAgent] = pricing_agents + weighting_agents for agent in agents: assert 0 < agent.total_updates
def test_agent_actions(): print() pricing_agents = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 5)), TaskPricingDdqnAgent(1, create_lstm_dqn_network(9, 5)), TaskPricingDuelingDqnAgent(2, create_lstm_dueling_dqn_network(9, 5)), TaskPricingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(9, 5)), TaskPricingDdpgAgent(4, create_lstm_actor_network(9), create_lstm_critic_network(9)), TaskPricingTD3Agent(5, create_lstm_actor_network(9), create_lstm_critic_network(9), create_lstm_critic_network(9)) ] weighting_agents = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 5)), ResourceWeightingDdqnAgent(1, create_lstm_dqn_network(16, 5)), ResourceWeightingDuelingDqnAgent( 2, create_lstm_dueling_dqn_network(16, 5)), ResourceWeightingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(16, 5)), ResourceWeightingDdpgAgent(4, create_lstm_actor_network(16), create_lstm_critic_network(16)), ResourceWeightingTD3Agent(5, create_lstm_actor_network(16), create_lstm_critic_network(16), create_lstm_critic_network(16)) ] env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/actions.env') for agent in pricing_agents: actions = { server: agent.bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } # noinspection PyUnboundLocalVariable print( f'Actions: {{{", ".join([f"{server.name}: {action}" for server, action in actions.items()])}}}' ) state, rewards, done, _ = env.step(actions) for agent in weighting_agents: actions = { server: agent.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } print( f'Actions: {{{", ".join([f"{server.name}: {list(task_action.values())}" for server, task_action in actions.items()])}}}' ) state, rewards, done, _ = env.step(actions)
def test_agent_evaluation(): print() setup_tensorboard('training/results/tmp/', 'agent_eval') env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env') eval_envs = generate_eval_envs(env, 5, 'training/settings/tmp/', overwrite=True) assert len(os.listdir('training/settings/tmp/')) == 5 total_auctions, total_resource_allocation = 0, 0 for eval_env in eval_envs: env, state = OnlineFlexibleResourceAllocationEnv.load_env(eval_env) total_auctions += len(env._unallocated_tasks) + ( 1 if state.auction_task is not None else 0) total_resource_allocation += env._total_time_steps + 1 pricing_agents = [ TaskPricingDqnAgent(0, create_bidirectional_dqn_network(9, 5)), TaskPricingDdpgAgent(1, create_lstm_actor_network(9), create_lstm_critic_network(9)) ] weighting_agents = [ ResourceWeightingDqnAgent(2, create_bidirectional_dqn_network(16, 5)), ResourceWeightingDdpgAgent(3, create_lstm_actor_network(16), create_lstm_critic_network(16)), ] results = eval_agent(eval_envs, 0, pricing_agents, weighting_agents) print( f'Results - Total prices: {results.total_prices}, Number of completed tasks: {results.num_completed_tasks}, ' f'failed tasks: {results.num_failed_tasks}, winning prices: {results.winning_prices}, ' f'Number of auctions: {results.num_auctions}, resource allocations: {results.num_resource_allocations}' ) assert 0 < results.num_completed_tasks assert 0 < results.num_failed_tasks assert results.num_auctions == total_auctions assert results.num_resource_allocations == total_resource_allocation
def load_agents(): task_pricing_agents = [ TaskPricingDqnAgent(agent_num, create_lstm_dqn_network(9, 21)) for agent_num in range(3) ] task_pricing_agents[0].model_network.load_weights( './analysis/fixed_heuristics/eval_agents/Task_pricing_Dqn_agent_0/update_80922' ) task_pricing_agents[1].model_network.load_weights( './analysis/fixed_heuristics/eval_agents/Task_pricing_Dqn_agent_1/update_86909' ) task_pricing_agents[2].model_network.load_weights( './analysis/fixed_heuristics/eval_agents/Task_pricing_Dqn_agent_2/update_88937' ) resource_weighting_agents = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 11)) ] resource_weighting_agents[0].model_network.load_weights( './analysis/fixed_heuristics/eval_agents/Resource_weighting_Dqn_agent_0/update_440898' ) return task_pricing_agents, resource_weighting_agents
writer, datetime = setup_tensorboard('training/results/logs/', folder) save_folder = f'{folder}_{datetime}' env = OnlineFlexibleResourceAllocationEnv([ './training/settings/basic.env', './training/settings/large_tasks_servers.env', './training/settings/limited_resources.env', './training/settings/mixture_tasks_servers.env' ]) eval_envs = generate_eval_envs( env, 20, f'./training/settings/eval_envs/network_arch/') task_pricing_agents = [ TaskPricingDqnAgent(agent_num, create_bidirectional_dqn_network(9, 21), save_folder=save_folder) for agent_num in range(3) ] resource_weighting_agents = [ ResourceWeightingDqnAgent(0, create_bidirectional_dqn_network(16, 11), save_folder=save_folder) ] # Train the agents with writer.as_default(): run_training(env, eval_envs, 600, task_pricing_agents, resource_weighting_agents, 10) for agent in task_pricing_agents: agent.save()
writer, datetime = setup_tensorboard('training/results/logs/', folder) save_folder = f'{folder}_{datetime}' env = OnlineFlexibleResourceAllocationEnv([ './training/settings/basic.env', './training/settings/large_tasks_servers.env', './training/settings/limited_resources.env', './training/settings/mixture_tasks_servers.env' ]) eval_envs = generate_eval_envs( env, 20, f'./training/settings/eval_envs/network_arch/') task_pricing_agents = [ TaskPricingDqnAgent(agent_num, create_lstm_dqn_network(9, 21), save_folder=save_folder) for agent_num in range(3) ] resource_weighting_agents = [ ResourceWeightingSeq2SeqAgent(0, create_seq2seq_actor_network(), create_seq2seq_critic_network(), create_seq2seq_critic_network(), save_folder=save_folder) ] with writer.as_default(): run_training(env, eval_envs, 600, task_pricing_agents, resource_weighting_agents, 10) for agent in task_pricing_agents:
def test_networks(): print() # Environment setup auction_task = Task('Test 4', 69.0, 35.0, 10.0, 0, 12) tasks = [ Task('Test 1', 76.0, 36.0, 16.0, 0, 12, TaskStage.LOADING, 50.0, price=1), Task('Test 2', 75.0, 37.0, 12.0, 0, 12, TaskStage.COMPUTING, 75.0, 10.0, price=1), Task('Test 3', 72.0, 47.0, 20.0, 0, 7, TaskStage.COMPUTING, 72.0, 25.0, price=1) ] server = Server('Test', 220.0, 35.0, 22.0) # Assert the environment is valid auction_task.assert_valid() for task in tasks: task.assert_valid() server.assert_valid() # List of networks pricing_networks = [ create_bidirectional_dqn_network(9, 3), create_lstm_dqn_network(9, 3), create_gru_dqn_network(9, 3), create_rnn_dqn_network(9, 3), create_lstm_dueling_dqn_network(9, 3), create_lstm_dueling_dqn_network(9, 3, combiner='max') ] weighting_networks = [ create_bidirectional_dqn_network(16, 3), create_lstm_dqn_network(16, 3), create_gru_dqn_network(16, 3), create_rnn_dqn_network(16, 3), create_lstm_dueling_dqn_network(16, 3), create_lstm_dueling_dqn_network(16, 3, combiner='max') ] # Network observations auction_obs = tf.expand_dims(TaskPricingDqnAgent._network_obs(auction_task, tasks, server, 0), axis=0) resource_obs = tf.expand_dims(ResourceWeightingDqnAgent._network_obs(tasks[0], tasks, server, 0), axis=0) print(f'Auction obs: {auction_obs}') print(f'Resource allocation obs: {resource_obs}') # Loop over the networks to find the output and output shape is correct for pricing_network, weighting_network in zip(pricing_networks, weighting_networks): auction_output = pricing_network(auction_obs) resource_output = weighting_network(resource_obs) print(f'Network: {pricing_network.name}' f'\n\tAuction: {auction_output} ({auction_output.shape})' f'\n\tResource allocation: {resource_output} ({resource_output.shape})') assert auction_output.shape == (1, 3) assert resource_output.shape == (1, 3) # Check for the categorical dqn networks as it is a special case pricing_network = create_lstm_categorical_dqn_network(9, 3, num_atoms=10) weighting_network = create_lstm_categorical_dqn_network(16, 3, num_atoms=10) auction_output = pricing_network(auction_obs) resource_output = weighting_network(resource_obs) print(f'Network: {pricing_network.name}' f'\n\tAuction: {auction_output}' f'\n\tResource allocation: {resource_output}') assert auction_output.shape == (1, 3, 10) assert resource_output.shape == (1, 3, 10) # Check for the ddpg networks as it is a special case pricing_actor_networks = [ create_lstm_actor_network(9) ] pricing_critic_networks = [ create_lstm_critic_network(9) ] weighting_actor_networks = [ create_lstm_actor_network(16) ] weighting_critic_networks = [ create_lstm_critic_network(16) ] for pricing_actor, pricing_critic, weighting_actor, weighting_critic in zip( pricing_actor_networks, pricing_critic_networks, weighting_actor_networks, weighting_critic_networks): auction_actor_output = pricing_actor(auction_obs) weighting_actor_output = weighting_actor(resource_obs) auction_critic_output = pricing_critic([auction_obs, auction_actor_output]) weighting_critic_output = weighting_critic([resource_obs, weighting_actor_output]) print(f'Network - actor: {pricing_actor.name}, critic: {pricing_critic.name}' f'\n\tAuction Actor: {auction_actor_output}, critic: {auction_critic_output}' f'\n\tWeighting Actor: {weighting_actor_output}, critic: {weighting_critic_output}') assert auction_actor_output.shape == (1, 1) assert weighting_actor_output.shape == (1, 1) assert auction_critic_output.shape == (1, 1) assert weighting_critic_output.shape == (1, 1)
def test_task_price_training(): print() setup_tensorboard('/tmp/results/', 'price_training') # List of agents agents: List[TaskPricingRLAgent] = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 10), batch_size=4, save_folder='tmp'), TaskPricingDdqnAgent(1, create_lstm_dqn_network(9, 10), batch_size=4, save_folder='tmp'), TaskPricingDuelingDqnAgent(2, create_lstm_dueling_dqn_network(9, 10), batch_size=4, save_folder='tmp'), TaskPricingCategoricalDqnAgent(3, create_lstm_categorical_dqn_network( 9, 10), batch_size=4, save_folder='tmp'), TaskPricingDdpgAgent(4, create_lstm_actor_network(9), create_lstm_critic_network(9), batch_size=4, save_folder='tmp'), TaskPricingTD3Agent(5, create_lstm_actor_network(9), create_lstm_critic_network(9), create_lstm_critic_network(9), batch_size=4, save_folder='tmp') ] # Load the environment env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'training/settings/auction.env') # Servers server_1, server_2 = list(state.server_tasks.keys()) # Actions actions = {server_1: 1.0, server_2: 2.0} # Environment step next_state, reward, done, info = env.step(actions) # Server states server_1_state = TaskPricingState(state.auction_task, state.server_tasks[server_1], server_1, state.time_step) server_2_state = TaskPricingState(state.auction_task, state.server_tasks[server_2], server_2, state.time_step) # Next server states next_server_1_state = TaskPricingState(next_state.auction_task, next_state.server_tasks[server_1], server_1, next_state.time_step) next_server_2_state = TaskPricingState(next_state.auction_task, next_state.server_tasks[server_2], server_2, next_state.time_step) # Finished auction task finished_task = next(finished_task for finished_task in next_state.server_tasks[server_1] if finished_task == state.auction_task) finished_task = finished_task._replace(stage=TaskStage.COMPLETED) failed_task = finished_task._replace(stage=TaskStage.FAILED) # Loop over the agents, add the observations and try training for agent in agents: agent.winning_auction_bid(server_1_state, actions[server_1], finished_task, next_server_1_state) agent.winning_auction_bid(server_1_state, actions[server_1], failed_task, next_server_1_state) agent.failed_auction_bid(server_2_state, actions[server_2], next_server_2_state) agent.failed_auction_bid(server_2_state, 0, next_server_2_state) agent.train() print( f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}' )
Training of single agents with multiple environments """ from __future__ import annotations from agents.rl_agents.agents.dqn import ResourceWeightingDqnAgent, TaskPricingDqnAgent from agents.rl_agents.neural_networks.dqn_networks import create_lstm_dqn_network from training.train_agents import setup_tensorboard, multi_env_single_env_training if __name__ == "__main__": folder = 'single_agent_multi_envs' primary_writer, datetime = setup_tensorboard('training/results/logs/', folder) save_folder = f'{folder}_{datetime}' task_pricing_agents = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 21), save_folder=save_folder) ] resource_weighting_agents = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 11), save_folder=save_folder) ] multi_env_single_env_training(folder, datetime, primary_writer, task_pricing_agents, resource_weighting_agents)
def test_epsilon_policy(): print() # Tests the epsilon policy by getting agent actions that should update the agent epsilon over time env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/actions.env') # Number of epsilon steps for the agents epsilon_steps = 25 # Agents that have a custom _get_action function pricing_agents = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), TaskPricingCategoricalDqnAgent(1, create_lstm_categorical_dqn_network( 9, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), TaskPricingDdpgAgent(2, create_lstm_actor_network(9), create_lstm_critic_network(9), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1) ] weighting_agents = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), ResourceWeightingCategoricalDqnAgent( 1, create_lstm_categorical_dqn_network(16, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), ResourceWeightingDdpgAgent(2, create_lstm_actor_network(16), create_lstm_critic_network(16), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1) ] # Generate a tf writer and generate actions that will update the epsilon values for both agents writer = tf.summary.create_file_writer(f'agent/tmp/testing_epsilon') num_steps = 10 with writer.as_default(): for _ in range(num_steps): for agent in pricing_agents: actions = { server: agent.bid(state.auction_task, tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } state, rewards, done, _ = env.step(actions) for _ in range(num_steps): for agent in weighting_agents: actions = { server: agent.weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } state, rewards, done, _ = env.step(actions) # Check that the resulting total action are valid for agent in pricing_agents: print(f'Agent: {agent.name}') assert agent.total_actions == num_steps * 3 for agent in weighting_agents: print(f'Agent: {agent.name}') assert agent.total_actions == num_steps * 3 # Check that the agent epsilon are correct assert pricing_agents[0].final_epsilon == pricing_agents[ 0].epsilon and pricing_agents[1].final_epsilon == pricing_agents[ 1].epsilon assert weighting_agents[0].final_epsilon == weighting_agents[ 0].epsilon and weighting_agents[1].final_epsilon == weighting_agents[ 1].epsilon assert pricing_agents[2].final_epsilon_std == pricing_agents[2].epsilon_std assert weighting_agents[2].final_epsilon_std == weighting_agents[ 2].epsilon_std
def test_build_agent(): def assert_args(test_agent, args): """ Asserts that the proposed arguments have assigned to the agent Args: test_agent: The test agent args: The argument used on the agent """ for arg_name, arg_value in args.items(): assert getattr(test_agent, arg_name) == arg_value, \ f'Attr: {arg_name}, correct value: {arg_value}, actual value: {getattr(test_agent, arg_name)}' # Check inheritance arguments reinforcement_learning_arguments = { 'batch_size': 16, 'error_loss_fn': tf.compat.v1.losses.mean_squared_error, 'initial_training_replay_size': 1000, 'training_freq': 2, 'replay_buffer_length': 20000, 'save_frequency': 12500, 'save_folder': 'test', 'discount_factor': 0.9 } dqn_arguments = { 'target_update_tau': 1.0, 'target_update_frequency': 2500, 'optimiser': tf.keras.optimizers.Adadelta(), 'initial_epsilon': 0.5, 'final_epsilon': 0.2, 'epsilon_update_freq': 25, 'epsilon_log_freq': 10, } ddpg_arguments = { 'actor_optimiser': tf.keras.optimizers.Adadelta(), 'critic_optimiser': tf.keras.optimizers.Adadelta(), 'initial_epsilon_std': 0.8, 'final_epsilon_std': 0.1, 'epsilon_update_freq': 25, 'epsilon_log_freq': 10, 'min_value': -15.0, 'max_value': 15 } pricing_arguments = { 'failed_auction_reward': -100, 'failed_multiplier': -100 } weighting_arguments = { 'other_task_discount': 0.2, 'success_reward': 1, 'failed_reward': -2 } # DQN Agent arguments ---------------------------------------------------------------------- dqn_pricing_arguments = { **reinforcement_learning_arguments, **dqn_arguments, **pricing_arguments } dqn_weighting_arguments = { **reinforcement_learning_arguments, **dqn_arguments, **weighting_arguments } pricing_network = create_lstm_dqn_network(9, 10) categorical_pricing_network = create_lstm_categorical_dqn_network(9, 10) pricing_agents = [ TaskPricingDqnAgent(0, pricing_network, **dqn_pricing_arguments), TaskPricingDdqnAgent(1, pricing_network, **dqn_pricing_arguments), TaskPricingDuelingDqnAgent(2, pricing_network, **dqn_pricing_arguments), TaskPricingCategoricalDqnAgent(3, categorical_pricing_network, **dqn_pricing_arguments) ] for agent in pricing_agents: print(f'Agent: {agent.name}') assert_args(agent, dqn_pricing_arguments) weighting_network = create_lstm_dqn_network(16, 10) categorical_weighting_network = create_lstm_categorical_dqn_network(16, 10) weighting_agents = [ ResourceWeightingDqnAgent(0, weighting_network, **dqn_weighting_arguments), ResourceWeightingDdqnAgent(1, weighting_network, **dqn_weighting_arguments), ResourceWeightingDuelingDqnAgent(2, weighting_network, **dqn_weighting_arguments), ResourceWeightingCategoricalDqnAgent(3, categorical_weighting_network, **dqn_weighting_arguments) ] for agent in weighting_agents: print(f'Agent: {agent.name}') assert_args(agent, dqn_weighting_arguments) # PG Agent arguments ---------------------------------------------------------------------------------- ddpg_pricing_arguments = { **reinforcement_learning_arguments, **ddpg_arguments, **pricing_arguments } ddpg_weighting_arguments = { **reinforcement_learning_arguments, **ddpg_arguments, **weighting_arguments } pricing_agents = [ TaskPricingDdpgAgent(3, create_lstm_actor_network(9), create_lstm_critic_network(9), **ddpg_pricing_arguments), TaskPricingTD3Agent(4, create_lstm_actor_network(9), create_lstm_critic_network(9), create_lstm_critic_network(9), **ddpg_pricing_arguments) ] for agent in pricing_agents: print(f'Agent: {agent.name}') assert_args(agent, ddpg_pricing_arguments) weighting_agents = [ ResourceWeightingDdpgAgent(3, create_lstm_actor_network(16), create_lstm_critic_network(16), **ddpg_weighting_arguments), ResourceWeightingTD3Agent(4, create_lstm_actor_network(16), create_lstm_critic_network(16), create_lstm_critic_network(16), **ddpg_weighting_arguments) ] for agent in weighting_agents: print(f'Agent: {agent.name}') assert_args(agent, ddpg_weighting_arguments)