def __init__(self): self.environments = {} for e in range(0,c.numEnvironments): self.environments[e] = ENVIRONMENT()
def main(): parser = argparse.ArgumentParser( description='PyTorch actor-critic example') parser.add_argument('--hidden_layer_size', type=int, default=128, metavar='N', help='Hidden Layer Size (default: 128)') parser.add_argument('--a_param', type=float, default=1.0, metavar='G', help='dynamics a_parameter') parser.add_argument('--b_param', type=float, default=5.0, metavar='G', help='dynamics b_parameter') args = parser.parse_args() for i in range(1): naf_environment = ENVIRONMENT(args, i) naf_environment.run() print("Learning Process Finished")
def __init__(self): self.envs = {} for i in range(0, c.numEnvs): self.envs[i] = ENVIRONMENT(i)
def main(N1, N2, D, parameter, iteration=int(1e5)): agent_list = [ALOHA_AGENT(D=D, arrival_rate=parameter[i], trans_prob=1/N2) \ for i in range(N2)] # parameterss pb2 n1_list = [ ALOHA_AGENT(D=D, arrival_rate=0.5, trans_prob=1 / (4 * N1)) for _ in range(N1) ] agent_list.extend(n1_list) channels = list(parameter[N2:]) # parameters ps2 n1_channels = [0.5 for _ in range(N1)] channels.extend(n1_channels) env = ENVIRONMENT(channels=channels, agent_list=agent_list) reward_list = [] energy_list = [] for t in tqdm(range(iteration)): reward, energy, observations = env.step(time=t) for i in range(N1 + N2): env.agent_list[i].update(observation=observations[i]) reward_list.append(reward) energy_list.append(energy) throughput, power = np.mean(reward_list[-int(1e4):]), np.mean( energy_list[-int(1e4):]) print('Throu = {}'.format(throughput)) print('Energy = {}'.format(power)) return throughput, power
def upper_bound(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() # get LP agent policy LP_policy = multichainLP(D=D, D_=D_, pb1=pb1, pt1=pt1, ps1=ps1, pb2=pb2, ps2=ps2) sp_agent = SPECIFY_AGENT(D=D_, arrival_rate=pb2, policy=LP_policy) sp_agent.initialize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=sp_agent) UP_reward = [] for _ in tqdm(range(iteration)): aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation, aloha_queue=env.aloha.queue) UP_reward.append(aloha_reward + agent_reward) Upper_bound_timely_throughput = np.mean(UP_reward) print('Upper_bound_timely_throughput:', Upper_bound_timely_throughput)
def main(N, D, parameter, iteration=int(1e5)): agent_list = [HSRA_AGENT(D=D, arrival_rate=parameter[i], learning_rate=0.01, gamma=0.9, length=1) \ for i in range(N)] # parameterss pb2 channels = parameter[N:] # parameters ps2 env = ENVIRONMENT(channels=channels, agent_list=agent_list) reward_list = [] energy_list = [] for time in tqdm(range(iteration)): reward, energy, observations = env.step(time=time) for i in range(N): env.agent_list[i].update(observation=observations[i], time=time, N=N) reward_list.append(reward) energy_list.append(energy) throughput, power = np.mean(reward_list[-int(1e4):]), np.mean( energy_list[-int(1e4):]) print('Throu = {}'.format(throughput)) print('Energy = {}'.format(power)) return throughput, power
def __init__(self): self.envs = {} for e in range(0, c.numEnvs): #self.ID = e self.envs[e] = ENVIRONMENT(e)
def __init__(self, numEnvs=4, eval_time=400): self.envs = {} self.numEnvs = numEnvs self.eval_time = eval_time for i in range(self.numEnvs): self.envs[i] = ENVIRONMENT(i, eval_time=self.eval_time)
def HSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e6)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() HSRA_agent = HSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1) HSRA_agent.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=HSRA_agent) HSRA_reward = [] # begin = time.time() for _ in tqdm(range(iteration)): aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation) HSRA_reward.append(aloha_reward + agent_reward) HSRA_timely_throughput = np.mean(HSRA_reward[-int(1e5):]) print('HSRA_timely_throughput:', HSRA_timely_throughput)
def main(): parser = argparse.ArgumentParser(description='PyTorch actor-critic example') parser.add_argument('--hidden_layer_size', type=int, default=128, metavar='N', help='Hidden Layer Size (default: 128)') args = parser.parse_args() for i in range(1): naf_environment = ENVIRONMENT(args, i) naf_environment.run()
def DLMA_FNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() DLMA = DQN_AGENT(D=D_, arrival_rate=pb2, state_size=160, n_actions=2, n_nodes=2, memory_size=1000, replace_target_iter=20, batch_size=64, learning_rate=0.01, gamma=0.9, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=0) DLMA.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=DLMA) state = [0] * DLMA.state_size DLMA_FNN_reward = [] begin = time.time() for i in tqdm(range(iteration)): aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation) env.agent.update(observation, state) DLMA_FNN_reward.append(aloha_reward + agent_reward) next_state = state[8:] + return_action( env.agent.action) + return_observation(observation) + [ agent_reward, aloha_reward ] env.agent.store_transition(state, env.agent.action, agent_reward, aloha_reward, next_state) if i > 100 and (i % 5 == 0): env.agent.learn() # internally iterates default (prediction) model state = next_state DLMA_FNN_timely_throughput = np.mean(DLMA_FNN_reward) print('DLMA_FNN_timely_throughput:', DLMA_FNN_timely_throughput) end = time.time() print(u'当前进程的运行时间: ', (end - begin), 's') print(u'当前进程的内存使用:%.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
def FSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e7)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() FSRA_agent = FSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01) FSRA_agent.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=FSRA_agent) FSRA_reward = [] for _ in tqdm(range(iteration)): aloha_reward, agent_reward ,observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation) FSRA_reward.append(aloha_reward + agent_reward) FSRA_timely_throughput = np.mean(FSRA_reward[-int(1e5):]) print('FSRA_timely_throughput:', FSRA_timely_throughput)
def main(n2, D, parameter, iteration=int(1e5)): agent_list = [DQN_AGENT(D=D, arrival_rate=parameter[i], state_size=int(8*M), n_actions=2, n_nodes=2, memory_size=E, replace_target_iter=F, batch_size=B, ) for i in range(n2)] env = ENVIRONMENT(channels=parameter[n2:], agent_list=agent_list) reward_list = [] energy_list = [] state = [[0] * int(8*M) for _ in range(n2)] next_state = [[0] * int(8*M) for _ in range(n2)] for t in tqdm(range(iteration)): for i in range(n2): env.agent_list[i].choose_action(np.array(state[i])) reward, energy, observations = env.step(time=t) reward_list.append(reward) energy_list.append(energy) for i in range(n2): env.agent_list[i].update_queue(observation=observations[i]) next_state[i], agent_reward, others_reward = return_next_state(i, state[i], env.agent_list, observations, reward) env.agent_list[i].store_transition(state[i], env.agent_list[i].action, agent_reward, others_reward, next_state[i]) if t > 100 and t % 5 == 0: for i in range(n2): env.agent_list[i].learn() state = copy.deepcopy(next_state) throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(energy_list[-int(1e4):]) print('Throu = {}'.format(throughput)) print('Energy = {}'.format(power)) return throughput, power
def TSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) TSRA_agent = TSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1) env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=TSRA_agent) TSRA_reward = [] begin = time.time() for _ in tqdm(range(iteration)): aloha_reward, agent_reward ,observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation) TSRA_reward.append(aloha_reward + agent_reward) TSRA_timely_throughput = np.mean(TSRA_reward) print('TSRA_timely_throughput:', TSRA_timely_throughput) end = time.time() print('time: ' , (end - begin), 's') print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024) )
print('average wifi reward: {}'.format(np.mean(wifi_reward_list[-2000:]))) print('average total reward:{}'.format( np.mean(agent_reward_list[-2000:]) + np.mean(wifi_reward_list[-2000:]))) print('Time elapsed:', time.time() - start) ### save training loss # dqn_agent.my_plot('len1e5_M20_W2_alpha50_g0.999_MM6_r10_1') if __name__ == "__main__": RATIO = 10 # the packet length of WiFi NUM_ACTIONS = 11 # the number of actions 0-10 env = ENVIRONMENT(features=NUM_ACTIONS + 4, ratio=RATIO, n_actions=NUM_ACTIONS, init_wifi_window_size=2, max_backoff=6, penalty=0.5) dqn_agent = DQN(env.features, env.ratio, env.n_actions, env.n_nodes, history_len=20, memory_size=1000, replace_target_iter=20, batch_size=32, learning_rate=0.01, gamma=0.999, epsilon=1, epsilon_min=0.005,
if __name__ == "__main__": n_nodes = 2 # number of nodes n_actions = 2 # number of actions M = 20 # state length E = 1000 # memory size F = 20 # target network update frequency B = 64 # mini-batch size gamma = 0.9 # discount factor alpha = 1 # fairness index max_iter = int(5e4) idx = 1 env = ENVIRONMENT(state_size=int(8 * M), tx_prob=0.2) agent = DQN(env.state_size, n_nodes, n_actions, memory_size=E, replace_target_iter=F, batch_size=B, gamma=gamma, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=alpha) main(env.tx_prob, M, E, F, B, gamma, alpha, idx, max_iter)
AUV1 = AUV() # allocate vehicle to a mission setattr(AUV1, 'mission', UEXP) # set vehicle parameters AUV1.origin = (16, 24, -9.5) # simple AUV1.goal = (3, 30, -9.5) # simple #AUV1.origin = (3,17,-9.5) # simple #AUV1.goal = (10,40,-9.5) # simple #AUV1.origin = (70,100,-9.5) # advanced #AUV1.goal = (60,40,-9.5) # advanced AUV1.speed = 2.0 # define our visualization output & create it ENV1 = ENVIRONMENT(UEXP, ReefFunction='reef') ENV1.UnknownRegions = { \ 0.8: [(50, 15), (43, 25), (80, 25), (88, 19), (90,18)], \ 0.4: [(80, 84), (95, 80), (95, 92), (76, 95)], \ 0.1: [(11, 8), (40, 0), (40, 17), (11, 11)] \ } ENV1.RiskField = ENV1.GenerateRiskField() sigma = 1.5 ENV1.RiskField = blurRiskField(ENV1.RiskField, sigma) ENV1.CurrentField_x, ENV1.CurrentField_y = ENV1.GenerateCurrentField( type="whirlpool", max_strength=1) VIS1 = VISUALIZATION(AUV1, ENV1) VIS1.ShowReef() # #VIS1.ShowCurrent() VIS1.ShowRisk()
def DLMA_RNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() DLMA = DQN(D=D_, arrival_rate=pb2, features=8, n_actions=2, n_nodes=2, state_length=4, memory_size=1000, replace_target_iter=20, batch_size=64, learning_rate=0.01, gamma=0.9, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=0) DLMA.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=DLMA) channel_state = [0] * DLMA.features state = np.zeros((4, len(channel_state))) DLMA_RNN_reward = [] begin = time.time() for i in tqdm(range(iteration)): state = np.vstack([state[1:], channel_state]) aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation) env.agent.update(observation, state) DLMA_RNN_reward.append(aloha_reward + agent_reward) next_channel_state = return_action( env.agent.action) + return_observation(observation) + [ agent_reward, agent_reward ] experience = np.concatenate([ channel_state, [env.agent.action, agent_reward, agent_reward], next_channel_state ]) env.agent.add_experience(experience) if i > 100 and (i % 5 == 0): env.agent.learn() # internally iterates default (prediction) model channel_state = next_channel_state DLMA_RNN_timely_throughput = np.mean(DLMA_RNN_reward) print('DLMA_RNN_timely_throughput:', DLMA_RNN_timely_throughput) end = time.time() print('time: ', (end - begin), 's') print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
def Initialize(self): for e in range(0, c.numEnvs): self.envs[e] = ENVIRONMENT(e=e)
n_nodes = 2 # number of nodes n_actions = 2 # number of actions M = 4 # state length E = 1000 # memory size F = 20 # target network update frequency B = 64 # mini-batch size gamma = 0.9 # discount factor alpha = 0 # fairness index max_iter = int(1e4) idx = 1 env = ENVIRONMENT(features=8, ) agent = DQN(env.features, n_nodes, n_actions, state_length=M, memory_size=E, replace_target_iter=F, batch_size=B, gamma=gamma, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=alpha) main(M, E, F, B, gamma, alpha, idx, max_iter)
' reward: {}'.format(np.mean(agent_reward_list[-2000:][j]))) #print('average aloha reward: {}'.format(np.mean(aloha_reward_list[-2000:]))) #print('average tdma reward: {}'.format(np.mean(tdma_reward_list[-2000:]))) print('average total reward: {}'.format( np.mean(agent_reward_list[-2000:]))) # + #np.mean(aloha_reward_list[-2000:]) + #np.mean(tdma_reward_list[-2000:]))) #print('tdma prob: %i' % env.tdmaPrb) #print('aloha prob: %i' % env.alohaPrb) print('agent prob: %i' % env.agentPrb) print('Time elapsed:', time.time() - start) if __name__ == "__main__": env = ENVIRONMENT( state_size=NN * 40, attempt_prob=0.2, ) dqn_agent = DQN( env.state_size, env.n_actions, env.n_nodes, memory_size=NN * 500, replace_target_iter=200, batch_size=NN * 32, learning_rate=0.01, gamma=0.9, epsilon=0.1, epsilon_min=0.005, epsilon_decay=0.995, )
def main(): #torch.utils.backcompat.broadcast_warning.enabled = True #torch.utils.backcompat.keepdim_warning.enabled = True #torch.set_default_tensor_type('torch.DoubleTensor') parser = argparse.ArgumentParser( description='PyTorch NAF-pendulum example') parser.add_argument('--gamma', type=float, default=0.99, metavar='G', help='discount factor (default: 0.99)') parser.add_argument('--tau', type=float, default=0.005, metavar='G', help='soft update parameter (default: 1e-3)') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='Batch size (default: 128)') parser.add_argument('--replay_buffer_size', type=int, default=1e6, metavar='N', help='Replay Buffer Size (default: 1e6)') parser.add_argument('--hidden_layer_size', type=int, default=128, metavar='N', help='Hidden Layer Size (default: 64)') parser.add_argument('--lr', type=float, default=5e-5, metavar='G', help='Learning rate of Actor Network (default: 1e-4)') parser.add_argument('--max_episode', type=float, default=1, metavar='N', help='Max Episode (default: 200)') parser.add_argument('--noise_scale', type=float, default=0.01, metavar='G', help='initial noise scale (default: 1.0)') parser.add_argument('--final_noise_scale', type=float, default=0.01, metavar='G', help='final noise scale (default: 0.001)') parser.add_argument('--a_param', type=float, default=0.95, metavar='G', help='a_param (default: 0.95)') parser.add_argument('--b_param', type=float, default=25.5, metavar='G', help='b_param (default: 5.0~100.0)') args = parser.parse_args() for i in range(1): SEED = 1 fixed_seed.fixed_seed_function(SEED) naf_environment = ENVIRONMENT(args, i) naf_environment.run() print("Learning Process Finished")
my_aloha.write(str(i) + ' ') # save model # dqn_agent.save_model("models/model_len1e5_M20_h6_q0.1_2.h5") # print the results print('-----------------------------') print('average agent reward: {}'.format(np.mean(agent_reward_list[-2000:]))) print('average aloha reward: {}'.format(np.mean(aloha_reward_list[-2000:]))) print('average total reward: {}'.format(np.mean(agent_reward_list[-2000:]) + np.mean(aloha_reward_list[-2000:]))) print('Time elapsed:', time.time()-start) if __name__ == "__main__": env = ENVIRONMENT(state_size=80, window_size=7, ) dqn_agent = DQN(env.state_size, env.n_actions, env.n_nodes, memory_size=500, replace_target_iter=200, batch_size=32, learning_rate=0.01, gamma=0.9, epsilon=0.1, epsilon_min=0.005, epsilon_decay=0.995, )
def __init__(self): self.envs = {i: ENVIRONMENT(i) for i in range(C.numEnvs)}
n_actions = 2 # number of actions M = 20 # state length E = 1000 # memory size F = 20 # target network update frequency B = 64 # mini-batch size gamma = 0.9 # discount factor alpha = 0 # fairness index max_iter = int(5e4) idx = 1 env = ENVIRONMENT(state_size=int(8*M), window_size=4, max_backoff=2 ) agent = DQN(env.state_size, n_nodes, n_actions, memory_size=E, replace_target_iter=F, batch_size=B, gamma=gamma, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=alpha )
n_nodes = 2 # number of nodes n_actions = 2 # number of actions M = 4 # state length E = 1000 # memory size F = 20 # target network update frequency B = 64 # mini-batch size gamma = 0.9 # discount factor alpha = 1 # fairness index max_iter = int(5e4) idx = 1 env = ENVIRONMENT(features=8, tx_prob=0.2) agent = DQN(env.features, n_nodes, n_actions, state_length=M, memory_size=E, replace_target_iter=F, batch_size=B, gamma=gamma, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=alpha) main(env.tx_prob, M, E, F, B, gamma, alpha, idx, max_iter)
my_tdma.write(str(i) + ' ') # save model # dqn_agent.save_model("models/model_len1e4_M20_h6_t10-3_1.h5") # print the results print('-----------------------------') print('average agent reward: {}'.format(np.mean( agent_reward_list[-2000:]))) print('average tdma reward: {}'.format(np.mean(tdma_reward_list[-2000:]))) print('average total reward: {}'.format( np.mean(agent_reward_list[-2000:]) + np.mean(tdma_reward_list[-2000:]))) print('Time elapsed:', time.time() - start) if __name__ == "__main__": env = ENVIRONMENT(state_size=40, ) dqn_agent = DQN( env.state_size, env.n_actions, env.n_nodes, memory_size=500, replace_target_iter=200, batch_size=32, learning_rate=0.01, gamma=0.9, epsilon=0.5, epsilon_min=0.005, epsilon_decay=0.995, )
print('average total reward:{}'.format( np.mean(agent_reward_list[-2000:]) + np.mean(aloha_reward_list[-2000:]) + np.mean(tdma_reward_list[-2000:]))) print('Time elapsed:', time.time() - start) dqn_agent.my_plot('len1e5_M20_g0.999_q0.5_t2-5_alpha50_r10_h20_1') if __name__ == "__main__": RATIO1 = RATIO # ALOHA packet length RATIO2 = RATIO # TDMA packet length NUM_ACTIONS = RATIO1 + 1 env = ENVIRONMENT( state_size=300, # 15*20 aloha_ratio=RATIO1, tdma_ratio=RATIO2, n_actions=NUM_ACTIONS, transmission_prob=0.5, penalty=0.5, ) dqn_agent = DQN(env.state_size, env.aloha_ratio, env.tdma_ratio, env.n_actions, env.n_nodes, memory_size=1000, replace_target_iter=20, batch_size=32, learning_rate=0.01, gamma=0.999, epsilon=1,
# dqn_agent.save_model("models/model_len5e5_M20_h6_q0.1_1.h5") # print the results print('-----------------------------') print('average agent reward: {}'.format(np.mean( agent_reward_list[-2000:]))) print('average aloha reward: {}'.format(np.mean( aloha_reward_list[-2000:]))) print('average total reward: {}'.format( np.mean(agent_reward_list[-2000:]) + np.mean(aloha_reward_list[-2000:]))) print('Time elapsed:', time.time() - start) if __name__ == "__main__": env = ENVIRONMENT( state_size=40, window_size=3, ) dqn_agent = DQN( env.state_size, env.n_actions, env.n_nodes, memory_size=500, replace_target_iter=200, batch_size=32, learning_rate=0.01, gamma=0.9, epsilon=0.1, epsilon_min=0.005, epsilon_decay=0.995, )
# print the results print('-----------------------------') print('average agent reward: {}'.format(np.mean( agent_reward_list[-2000:]))) print('average aloha reward: {}'.format(np.mean( aloha_reward_list[-2000:]))) print('average tdma reward: {}'.format(np.mean(tdma_reward_list[-2000:]))) print('average total reward: {}'.format( np.mean(agent_reward_list[-2000:]) + np.mean(aloha_reward_list[-2000:]) + np.mean(tdma_reward_list[-2000:]))) print('Time elapsed:', time.time() - start) if __name__ == "__main__": env = ENVIRONMENT(state_size=40, attempt_prob=0.7) dqn_agent = DQN( env.state_size, env.n_actions, env.n_nodes, memory_size=500, replace_target_iter=200, batch_size=32, learning_rate=0.01, gamma=0.9, epsilon=0.1, epsilon_min=0.005, epsilon_decay=0.995, )