def main(N1, N2, D, parameter, iteration=int(1e5)): agent_list = [ALOHA_AGENT(D=D, arrival_rate=parameter[i], trans_prob=1/N2) \ for i in range(N2)] # parameterss pb2 n1_list = [ ALOHA_AGENT(D=D, arrival_rate=0.5, trans_prob=1 / (4 * N1)) for _ in range(N1) ] agent_list.extend(n1_list) channels = list(parameter[N2:]) # parameters ps2 n1_channels = [0.5 for _ in range(N1)] channels.extend(n1_channels) env = ENVIRONMENT(channels=channels, agent_list=agent_list) reward_list = [] energy_list = [] for t in tqdm(range(iteration)): reward, energy, observations = env.step(time=t) for i in range(N1 + N2): env.agent_list[i].update(observation=observations[i]) reward_list.append(reward) energy_list.append(energy) throughput, power = np.mean(reward_list[-int(1e4):]), np.mean( energy_list[-int(1e4):]) print('Throu = {}'.format(throughput)) print('Energy = {}'.format(power)) return throughput, power
def upper_bound(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() # get LP agent policy LP_policy = multichainLP(D=D, D_=D_, pb1=pb1, pt1=pt1, ps1=ps1, pb2=pb2, ps2=ps2) sp_agent = SPECIFY_AGENT(D=D_, arrival_rate=pb2, policy=LP_policy) sp_agent.initialize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=sp_agent) UP_reward = [] for _ in tqdm(range(iteration)): aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation, aloha_queue=env.aloha.queue) UP_reward.append(aloha_reward + agent_reward) Upper_bound_timely_throughput = np.mean(UP_reward) print('Upper_bound_timely_throughput:', Upper_bound_timely_throughput)
def HSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e6)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() HSRA_agent = HSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1) HSRA_agent.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=HSRA_agent) HSRA_reward = [] # begin = time.time() for _ in tqdm(range(iteration)): aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation) HSRA_reward.append(aloha_reward + agent_reward) HSRA_timely_throughput = np.mean(HSRA_reward[-int(1e5):]) print('HSRA_timely_throughput:', HSRA_timely_throughput)
def DLMA_FNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() DLMA = DQN_AGENT(D=D_, arrival_rate=pb2, state_size=160, n_actions=2, n_nodes=2, memory_size=1000, replace_target_iter=20, batch_size=64, learning_rate=0.01, gamma=0.9, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=0) DLMA.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=DLMA) state = [0] * DLMA.state_size DLMA_FNN_reward = [] begin = time.time() for i in tqdm(range(iteration)): aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation) env.agent.update(observation, state) DLMA_FNN_reward.append(aloha_reward + agent_reward) next_state = state[8:] + return_action( env.agent.action) + return_observation(observation) + [ agent_reward, aloha_reward ] env.agent.store_transition(state, env.agent.action, agent_reward, aloha_reward, next_state) if i > 100 and (i % 5 == 0): env.agent.learn() # internally iterates default (prediction) model state = next_state DLMA_FNN_timely_throughput = np.mean(DLMA_FNN_reward) print('DLMA_FNN_timely_throughput:', DLMA_FNN_timely_throughput) end = time.time() print(u'当前进程的运行时间: ', (end - begin), 's') print(u'当前进程的内存使用:%.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
def main(n, D, l, d, eta, pb1, pt1, ps1, pb2, ps2, T=int(1e5)): begin = time.time() agents_list = [ALOHA_AGENT(D, arrival_rate=pb1, trans_prob=pt1), \ Learn2MAC(D, l, d, eta, T, pb2, ps2)] env = Environment(agents_list=agents_list, channels=[ps1, ps2]) reward_list = [] energy_list = [] for t in tqdm(range(T)): for agent in agents_list: agent.select_action(t) reward, energy, patterns = env.step(t) reward_list.append(reward) energy_list.append(energy) for agent in agents_list: if isinstance(agent, ALOHA_AGENT): agent.update(reward) # aloha else: agent.update(t, reward, patterns) # learn2mac end = time.time() print('Throu = {}'.format(np.mean(reward_list[-int(1e4):]))) print('Energy = {}'.format(np.mean(energy_list[-int(1e4):]))) print('Time = {}s'.format(end - begin)) print('Memory = {}MB'.format( psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
def FSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e7)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() FSRA_agent = FSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01) FSRA_agent.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=FSRA_agent) FSRA_reward = [] for _ in tqdm(range(iteration)): aloha_reward, agent_reward ,observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation) FSRA_reward.append(aloha_reward + agent_reward) FSRA_timely_throughput = np.mean(FSRA_reward[-int(1e5):]) print('FSRA_timely_throughput:', FSRA_timely_throughput)
def TSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() TSRA_agent = TSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1) TSRA_agent.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=TSRA_agent) TSRA_reward = [] begin = time.time() for _ in tqdm(range(iteration)): aloha_reward, agent_reward ,observation = env.step() env.aloha.update(observation=observation) env.agent.update(observation=observation) TSRA_reward.append(aloha_reward + agent_reward) TSRA_timely_throughput = np.mean(TSRA_reward) print('TSRA_timely_throughput:', TSRA_timely_throughput) end = time.time() print('time: ' , (end - begin), 's') print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024) )
def DLMA_RNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)): aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1) aloha.initialize() DLMA = DQN(D=D_, arrival_rate=pb2, features=8, n_actions=2, n_nodes=2, state_length=4, memory_size=1000, replace_target_iter=20, batch_size=64, learning_rate=0.01, gamma=0.9, epsilon=1, epsilon_min=0.005, epsilon_decay=0.995, alpha=0) DLMA.initailize() env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=DLMA) channel_state = [0] * DLMA.features state = np.zeros((4, len(channel_state))) DLMA_RNN_reward = [] begin = time.time() for i in tqdm(range(iteration)): state = np.vstack([state[1:], channel_state]) aloha_reward, agent_reward, observation = env.step() env.aloha.update(observation) env.agent.update(observation, state) DLMA_RNN_reward.append(aloha_reward + agent_reward) next_channel_state = return_action( env.agent.action) + return_observation(observation) + [ agent_reward, agent_reward ] experience = np.concatenate([ channel_state, [env.agent.action, agent_reward, agent_reward], next_channel_state ]) env.agent.add_experience(experience) if i > 100 and (i % 5 == 0): env.agent.learn() # internally iterates default (prediction) model channel_state = next_channel_state DLMA_RNN_timely_throughput = np.mean(DLMA_RNN_reward) print('DLMA_RNN_timely_throughput:', DLMA_RNN_timely_throughput) end = time.time() print('time: ', (end - begin), 's') print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))