Ejemplo n.º 1
0
Archivo: main.py Proyecto: leideng/TSRA
def HSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e6)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    HSRA_agent = HSRA_AGENT(D=D_,
                            arrival_rate=pb2,
                            learning_rate=0.01,
                            gamma=0.9,
                            length=1)
    HSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=HSRA_agent)
    HSRA_reward = []

    # begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        HSRA_reward.append(aloha_reward + agent_reward)

    HSRA_timely_throughput = np.mean(HSRA_reward[-int(1e5):])
    print('HSRA_timely_throughput:', HSRA_timely_throughput)
Ejemplo n.º 2
0
def main(N1, N2, D, parameter, iteration=int(1e5)):

    agent_list = [ALOHA_AGENT(D=D, arrival_rate=parameter[i], trans_prob=1/N2) \
        for i in range(N2)] # parameterss pb2

    n1_list = [
        ALOHA_AGENT(D=D, arrival_rate=0.5, trans_prob=1 / (4 * N1))
        for _ in range(N1)
    ]

    agent_list.extend(n1_list)

    channels = list(parameter[N2:])  # parameters ps2

    n1_channels = [0.5 for _ in range(N1)]
    channels.extend(n1_channels)
    env = ENVIRONMENT(channels=channels, agent_list=agent_list)

    reward_list = []
    energy_list = []

    for t in tqdm(range(iteration)):
        reward, energy, observations = env.step(time=t)

        for i in range(N1 + N2):
            env.agent_list[i].update(observation=observations[i])

        reward_list.append(reward)
        energy_list.append(energy)

    throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(
        energy_list[-int(1e4):])
    print('Throu = {}'.format(throughput))
    print('Energy = {}'.format(power))
    return throughput, power
Ejemplo n.º 3
0
def main(N, D, parameter, iteration=int(1e5)):

    agent_list = [HSRA_AGENT(D=D, arrival_rate=parameter[i], learning_rate=0.01, gamma=0.9, length=1) \
        for i in range(N)] # parameterss pb2

    channels = parameter[N:]  # parameters ps2
    env = ENVIRONMENT(channels=channels, agent_list=agent_list)

    reward_list = []
    energy_list = []

    for time in tqdm(range(iteration)):
        reward, energy, observations = env.step(time=time)
        for i in range(N):
            env.agent_list[i].update(observation=observations[i],
                                     time=time,
                                     N=N)

        reward_list.append(reward)
        energy_list.append(energy)

    throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(
        energy_list[-int(1e4):])
    print('Throu = {}'.format(throughput))
    print('Energy = {}'.format(power))
    return throughput, power
Ejemplo n.º 4
0
Archivo: main.py Proyecto: leideng/TSRA
def upper_bound(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):

    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    # get LP agent policy
    LP_policy = multichainLP(D=D,
                             D_=D_,
                             pb1=pb1,
                             pt1=pt1,
                             ps1=ps1,
                             pb2=pb2,
                             ps2=ps2)

    sp_agent = SPECIFY_AGENT(D=D_, arrival_rate=pb2, policy=LP_policy)
    sp_agent.initialize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=sp_agent)

    UP_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation, aloha_queue=env.aloha.queue)
        UP_reward.append(aloha_reward + agent_reward)

    Upper_bound_timely_throughput = np.mean(UP_reward)
    print('Upper_bound_timely_throughput:', Upper_bound_timely_throughput)
Ejemplo n.º 5
0
def DLMA_FNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN_AGENT(D=D_,
                     arrival_rate=pb2,
                     state_size=160,
                     n_actions=2,
                     n_nodes=2,
                     memory_size=1000,
                     replace_target_iter=20,
                     batch_size=64,
                     learning_rate=0.01,
                     gamma=0.9,
                     epsilon=1,
                     epsilon_min=0.005,
                     epsilon_decay=0.995,
                     alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)
    state = [0] * DLMA.state_size

    DLMA_FNN_reward = []

    begin = time.time()
    for i in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)
        DLMA_FNN_reward.append(aloha_reward + agent_reward)

        next_state = state[8:] + return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, aloha_reward
            ]

        env.agent.store_transition(state, env.agent.action, agent_reward,
                                   aloha_reward, next_state)
        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        state = next_state

    DLMA_FNN_timely_throughput = np.mean(DLMA_FNN_reward)
    print('DLMA_FNN_timely_throughput:', DLMA_FNN_timely_throughput)

    end = time.time()
    print(u'当前进程的运行时间: ', (end - begin), 's')
    print(u'当前进程的内存使用:%.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
Ejemplo n.º 6
0
def FSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e7)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    FSRA_agent = FSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01)
    FSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=FSRA_agent)

    FSRA_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        FSRA_reward.append(aloha_reward + agent_reward)

    FSRA_timely_throughput = np.mean(FSRA_reward[-int(1e5):])
    print('FSRA_timely_throughput:', FSRA_timely_throughput)
Ejemplo n.º 7
0
def main(n2, D, parameter, iteration=int(1e5)):
    agent_list = [DQN_AGENT(D=D, 
                            arrival_rate=parameter[i], 
                            state_size=int(8*M),
                            n_actions=2, 
                            n_nodes=2,
                            memory_size=E,
                            replace_target_iter=F,
                            batch_size=B,
                            ) for i in range(n2)]

    env = ENVIRONMENT(channels=parameter[n2:], agent_list=agent_list)

    reward_list = []
    energy_list = []

    state = [[0] * int(8*M) for _ in range(n2)]
    next_state = [[0] * int(8*M) for _ in range(n2)]
    for t in tqdm(range(iteration)):
        for i in range(n2):
            env.agent_list[i].choose_action(np.array(state[i]))

        reward, energy, observations = env.step(time=t) 

        reward_list.append(reward)
        energy_list.append(energy)

        for i in range(n2): 
            env.agent_list[i].update_queue(observation=observations[i])
            next_state[i], agent_reward, others_reward = return_next_state(i, state[i], env.agent_list, observations, reward)
            env.agent_list[i].store_transition(state[i], env.agent_list[i].action, agent_reward, others_reward, next_state[i])

        if t > 100 and t % 5 == 0:
            for i in range(n2):
                env.agent_list[i].learn() 

        state = copy.deepcopy(next_state)

    throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(energy_list[-int(1e4):]) 
    print('Throu = {}'.format(throughput))
    print('Energy = {}'.format(power))
    return throughput, power
Ejemplo n.º 8
0
def TSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)

    TSRA_agent = TSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1)

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=TSRA_agent)
    TSRA_reward = []

    begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        TSRA_reward.append(aloha_reward + agent_reward)

    TSRA_timely_throughput = np.mean(TSRA_reward)
    print('TSRA_timely_throughput:', TSRA_timely_throughput)

    end = time.time()
    print('time: ' , (end - begin), 's')
    print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024) )
Ejemplo n.º 9
0
def DLMA_RNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN(D=D_,
               arrival_rate=pb2,
               features=8,
               n_actions=2,
               n_nodes=2,
               state_length=4,
               memory_size=1000,
               replace_target_iter=20,
               batch_size=64,
               learning_rate=0.01,
               gamma=0.9,
               epsilon=1,
               epsilon_min=0.005,
               epsilon_decay=0.995,
               alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)

    channel_state = [0] * DLMA.features
    state = np.zeros((4, len(channel_state)))

    DLMA_RNN_reward = []
    begin = time.time()
    for i in tqdm(range(iteration)):
        state = np.vstack([state[1:], channel_state])
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)

        DLMA_RNN_reward.append(aloha_reward + agent_reward)
        next_channel_state = return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, agent_reward
            ]
        experience = np.concatenate([
            channel_state, [env.agent.action, agent_reward, agent_reward],
            next_channel_state
        ])

        env.agent.add_experience(experience)

        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        channel_state = next_channel_state

    DLMA_RNN_timely_throughput = np.mean(DLMA_RNN_reward)
    print('DLMA_RNN_timely_throughput:', DLMA_RNN_timely_throughput)

    end = time.time()
    print('time: ', (end - begin), 's')
    print('memory: %.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))