Esempio n. 1
0
File: main.py Progetto: leideng/TSRA
def HSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e6)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    HSRA_agent = HSRA_AGENT(D=D_,
                            arrival_rate=pb2,
                            learning_rate=0.01,
                            gamma=0.9,
                            length=1)
    HSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=HSRA_agent)
    HSRA_reward = []

    # begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        HSRA_reward.append(aloha_reward + agent_reward)

    HSRA_timely_throughput = np.mean(HSRA_reward[-int(1e5):])
    print('HSRA_timely_throughput:', HSRA_timely_throughput)
Esempio n. 2
0
File: main.py Progetto: leideng/TSRA
def upper_bound(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):

    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    # get LP agent policy
    LP_policy = multichainLP(D=D,
                             D_=D_,
                             pb1=pb1,
                             pt1=pt1,
                             ps1=ps1,
                             pb2=pb2,
                             ps2=ps2)

    sp_agent = SPECIFY_AGENT(D=D_, arrival_rate=pb2, policy=LP_policy)
    sp_agent.initialize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=sp_agent)

    UP_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation, aloha_queue=env.aloha.queue)
        UP_reward.append(aloha_reward + agent_reward)

    Upper_bound_timely_throughput = np.mean(UP_reward)
    print('Upper_bound_timely_throughput:', Upper_bound_timely_throughput)
Esempio n. 3
0
def DLMA_FNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN_AGENT(D=D_,
                     arrival_rate=pb2,
                     state_size=160,
                     n_actions=2,
                     n_nodes=2,
                     memory_size=1000,
                     replace_target_iter=20,
                     batch_size=64,
                     learning_rate=0.01,
                     gamma=0.9,
                     epsilon=1,
                     epsilon_min=0.005,
                     epsilon_decay=0.995,
                     alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)
    state = [0] * DLMA.state_size

    DLMA_FNN_reward = []

    begin = time.time()
    for i in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)
        DLMA_FNN_reward.append(aloha_reward + agent_reward)

        next_state = state[8:] + return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, aloha_reward
            ]

        env.agent.store_transition(state, env.agent.action, agent_reward,
                                   aloha_reward, next_state)
        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        state = next_state

    DLMA_FNN_timely_throughput = np.mean(DLMA_FNN_reward)
    print('DLMA_FNN_timely_throughput:', DLMA_FNN_timely_throughput)

    end = time.time()
    print(u'当前进程的运行时间: ', (end - begin), 's')
    print(u'当前进程的内存使用:%.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
Esempio n. 4
0
def FSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e7)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    FSRA_agent = FSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01)
    FSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=FSRA_agent)

    FSRA_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        FSRA_reward.append(aloha_reward + agent_reward)

    FSRA_timely_throughput = np.mean(FSRA_reward[-int(1e5):])
    print('FSRA_timely_throughput:', FSRA_timely_throughput)
Esempio n. 5
0
File: main.py Progetto: leideng/TSRA
def TSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    TSRA_agent = TSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1)
    TSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=TSRA_agent)
    TSRA_reward = []

    begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        TSRA_reward.append(aloha_reward + agent_reward)

    TSRA_timely_throughput = np.mean(TSRA_reward)
    print('TSRA_timely_throughput:', TSRA_timely_throughput)

    end = time.time()
    print('time: ' , (end - begin), 's')
    print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024) )
Esempio n. 6
0
def DLMA_RNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN(D=D_,
               arrival_rate=pb2,
               features=8,
               n_actions=2,
               n_nodes=2,
               state_length=4,
               memory_size=1000,
               replace_target_iter=20,
               batch_size=64,
               learning_rate=0.01,
               gamma=0.9,
               epsilon=1,
               epsilon_min=0.005,
               epsilon_decay=0.995,
               alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)

    channel_state = [0] * DLMA.features
    state = np.zeros((4, len(channel_state)))

    DLMA_RNN_reward = []
    begin = time.time()
    for i in tqdm(range(iteration)):
        state = np.vstack([state[1:], channel_state])
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)

        DLMA_RNN_reward.append(aloha_reward + agent_reward)
        next_channel_state = return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, agent_reward
            ]
        experience = np.concatenate([
            channel_state, [env.agent.action, agent_reward, agent_reward],
            next_channel_state
        ])

        env.agent.add_experience(experience)

        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        channel_state = next_channel_state

    DLMA_RNN_timely_throughput = np.mean(DLMA_RNN_reward)
    print('DLMA_RNN_timely_throughput:', DLMA_RNN_timely_throughput)

    end = time.time()
    print('time: ', (end - begin), 's')
    print('memory: %.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))