Exemple #1
0
    def __init__(self, id, config):
        LearningModel.__init__(self)
        self.id = id
        self.config = config
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "DDPG_CONFIG", self.config)

        self.actor = None
        self.actor_optim = None

        self.critic = None
        self.critic_optim = None

        self.actor_target = None

        self.critic_target = None

        self.init_model()
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "actor_target_arch", self.actor_target)
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "critic_target_arch", self.critic_target)

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "actor_arch", self.actor)
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "critic_arch", self.critic)

        self.step_counter = 0

        if USE_CUDA:
            self.move_nn_to_gpu()
Exemple #2
0
    def __init__(self, id, config):
        """
        Prioritized experience replay buffer initialization.
        """

        self.id = id
        self.config = config
        self.tree = SumTree(self.config.buffer_size)
        self.buffer_size = self.config.buffer_size
        self.batch_size = self.config.batch_size
        self.alpha = self.config.alpha

        self.experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(self.config.seed)
        self.step_lock = Lock()
        self.sample_lock = Lock()
        self.sample_lock.acquire()  # allow save_step first

        self.eps = EPSILON
        self.beta = 0.6

        self.beta_increment_per_sampling = 2. / float(self.config.total_step)

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "PER_REPLAY_MEMORY_CONFIG", self.config)
Exemple #3
0
    def get_action(self, state):
        """
        Get the action from the actor
        :param state: env state in np
        :return: action in np
        """

        # if logic is present, the agent will use logic as the actor
        with self.actor_lock:
            if self.scheduler_function is None:
                state = to_tensor(state)
                ret = self.actor.forward(state)
                ret = to_numpy(ret)
            else:
                ret = self.scheduler_function.forward(state)

            noise_factor = math.exp(-self.n_step *
                                    self.config.noise_attenuation)
            if self.action_noise:
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "NOISE_FACTOR", noise_factor, self.n_step)
                ret = self.action_noise.add_noise(ret, noise_factor)
            for a in range(len(ret)):
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "ACTION_" + str(a), ret[a], self.n_step)
        return ret
    def step(self, action):
        total_reward = 0.0
        rewards = []
        total_rb = 0.0
        for u in range(self.config.n_ue):
            a_n_rb = self.tmp_state[u].n_rb
            GLOBAL_LOGGER.get_tb_logger().add_scalar('required_n_rb.' + str(u), a_n_rb, self.n_step)
            if action[u] == 1 and self.tmp_state[u].q_length:
                total_rb += a_n_rb
        if total_rb <= self.config.ue_config.channel.total_n_rb:
            total_rb = self.config.ue_config.channel.total_n_rb

        for u in range(self.config.n_ue):
            if action[u] == 1 and self.tmp_state[u].q_length > 0:
                n_rb = round(float(self.tmp_state[u].n_rb) / total_rb * self.config.ue_config.channel.total_n_rb)
                GLOBAL_LOGGER.get_tb_logger().add_scalar('A_NRB_' + str(u), n_rb, self.n_step)
                r = self.ue_list[u].step(UE_RB_ACTION(n_rb))
                total_reward += r
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), r, self.n_step)
            else:
                self.ue_list[u].step(UE_RB_ACTION(0))
                r = 0
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), 0, self.n_step)

            rewards.append(r)

        GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD', total_reward, self.n_step)
        return np.array(rewards, dtype=float)
    def run(self):
        rewards_his = np.zeros(self.config.n_ue)
        total_reward_his = 0
        for e in range(self.config.n_episode):
            self.init_env()
            for t in range(self.config.n_step):
                state = self.get_state()
                action = self.agent.get_action(state)
                action_ = np.copy(action)
                if self.config.action_conversion_f is None:
                    for u in range(self.config.n_ue):
                        if (action_[u] > 1.):
                            action_[u] = 1.
                else:
                    action_ = self.config.action_conversion_f(action_)
                rewards = self.step(action_)

                rewards_his = 0.99 * rewards_his + 0.01 * rewards
                for i in range(self.config.n_ue):
                    GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD.moving_avg.' + str(i), rewards_his[i],
                                                             self.n_step)

                total_reward = np.sum(rewards)
                total_reward_his = 0.99 * total_reward_his + 0.01 * total_reward

                GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD.moving_avg', total_reward_his, self.n_step)

                next_state = self.get_state()
                done = 0
                if t == self.config.n_step - 1:
                    done = 1
                self.agent.save_step(state, action, rewards, next_state, done)
    def step(self, action):
        '''

        :param action: 0 or 1 for Tx binary,  0~1 for the percentage of RBs
        :return:
        '''
        total_reward = 0.0
        rewards = []
        total_rb_pct = 0.0
        for u in range(self.config.n_ue):
            a_n_rb = action[u] * self.config.ue_config.channel.total_n_rb
            GLOBAL_LOGGER.get_tb_logger().add_scalar('required_n_rb.' + str(u), a_n_rb, self.n_step)
            total_rb_pct += action[u]
        if total_rb_pct < 1.:
            total_rb_pct = 1.
        for u in range(self.config.n_ue):
            if int(action[u] * self.config.ue_config.channel.total_n_rb) > 0 and self.tmp_state[u].q_length > 0:
                n_rb = round(float(action[u]) / total_rb_pct * self.config.ue_config.channel.total_n_rb)
                GLOBAL_LOGGER.get_tb_logger().add_scalar('A_NRB_' + str(u), n_rb, self.n_step)
                r = self.ue_list[u].step(UE_RB_ACTION(n_rb))
                total_reward += r
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), r, self.n_step)
            else:
                self.ue_list[u].step(UE_RB_ACTION(0))
                r = 0
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), 0, self.n_step)

            rewards.append(r)

        GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD', total_reward, self.n_step)
        return np.array(rewards, dtype=float)
Exemple #7
0
    def __init__(self, id, config, agent, learning_model, replay_memory):
        Thread.__init__(self)
        self.id = id
        self.config = config
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "CONTROLLER_CONFIG", config)

        self.agent = agent
        self.model = learning_model
        self.replay_memory = replay_memory

        self.agent.update_actor(self.model.get_actor())
        if isinstance(self.agent, Thread):
            print("agent as a thread start it")
            self.agent.start()
        self.step = 0
Exemple #8
0
    def __init__(self, id, config, replay_memory, scheduler_function=None):
        StatusObject.__init__(self)
        self.id = id
        self.env_id = None
        self.replay_memory = replay_memory
        self.actor = None
        self.actor_lock = Lock()
        self.config = config

        self.scheduler_function = scheduler_function

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "AGENT_CONFIG", self.config)
        if isinstance(self.config.noise_config, OU_ACTION_NOISE_CONFIG):
            self.action_noise = OUActionNoise(self.id,
                                              self.config.noise_config)
        else:
            self.action_noise = None
    def __init__(self, id, config):
        """Initialize a ReplayBuffer object.
        Params
        ======
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
        """
        self.id = id
        self.config = config

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "REPLAY_MEMORY_CONFIG", self.config)

        self.memory = deque(
            maxlen=self.config.buffer_size)  # internal memory (deque)
        self.experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(self.config.seed)
        self.step_lock = Lock()
        self.sample_lock = Lock()
        self.sample_lock.acquire()  # allow save_step first
Exemple #10
0
    def run(self):
        for x in range(self.config.total_step):
            t1 = time.time()
            batch = self.replay_memory.sample(asynchronization=True)
            if batch is not None:
                training_info = self.model.step(batch)
                if training_info is not None:
                    self.replay_memory.training_info(batch, training_info)
                self.agent.update_actor(self.model.get_actor())
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "training_time",
                    time.time() - t1, x)
            else:
                time.sleep(0.01)

            if x % 5000 == 0:
                output_file_path = GLOBAL_LOGGER.get_log_path()
                self.model.save(output_file_path, str(x))
                GLOBAL_LOGGER.reset_event_file()

        output_file_path = GLOBAL_LOGGER.get_log_path()

        self.model.save(output_file_path, 'final')
Exemple #11
0
    def run(self):
        rewards_his = np.zeros(self.config.n_ue)
        total_reward_his = 0
        for e in range(self.config.n_episode):
            self.init_env()
            for t in range(self.config.n_step):
                state = self.get_state()
                action = self.agent.get_action(state)
                action_ = np.copy(action)
                phi = self.get_phi()

                if self.config.action_conversion_f is None:
                    action_[action_ > 0.] = 1
                    action_[action_ <= 0.] = 0
                else:
                    action_ = self.config.action_conversion_f(action_)
                rewards = self.step(action_)

                rewards_his = 0.99 * rewards_his + 0.01 * rewards
                for i in range(self.config.n_ue):
                    GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD.moving_avg.' + str(i), rewards_his[i],
                                                             self.n_step)

                total_reward = np.sum(rewards)
                total_reward_his = 0.99 * total_reward_his + 0.01 * total_reward

                GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD.moving_avg', total_reward_his, self.n_step)

                next_state = self.get_state()
                phi_next = self.get_phi()
                shaper = - 1. * (phi - self.gamma * phi_next)
                for i in range(self.config.n_ue):
                    GLOBAL_LOGGER.get_tb_logger().add_scalar('shaper.' + str(i), shaper[i],
                                                             self.n_step)

                done = 0
                if t == self.config.n_step - 1:
                    done = 1
                self.agent.save_step(state, action, rewards + shaper, next_state, done)
    def connection_handler(self, conn):
        print('TBServer get conn from', conn)
        c = TBClientListener(conn, self.logger)
        c.start()
        self.tb_client_listener_thread_list.append(c)


if __name__ == '__main__':
    from sim_src.tb_logger import GLOBAL_LOGGER

    GLOBAL_LOGGER.set_log_path('/tmp/aaaaa/', 'test_tensor_board_server',
                               'test_tensor_board_server')

    t = scalar()
    t.tti = 1213
    t.name = 'hello'
    ts = Timestamp()
    t.timestamp.seconds = 10
    t.timestamp.nanos = 112310
    print(t.timestamp.ToMicroseconds())
    print(t.name)
    print(t.timestamp)
    print(t.tti)
    print(t.value)

    server = TBServer(server_bind_ip='127.0.1.100',
                      server_bind_port=TENSORBOARD_SERVER_PORT,
                      logger=GLOBAL_LOGGER.get_tb_logger())
    server.start()
Exemple #13
0
 def __init__(self, id, config, agent):
     Thread.__init__(self)
     SimEnv.__init__(self, id, config, agent)
     GLOBAL_LOGGER.get_tb_logger().add_text_of_object("ENV_CONFIG", self.config)
Exemple #14
0
scalar = 'N_RLCTX_'
scalar_list.append(scalar)

scalar = 'N_DISCARD_'
scalar_list.append(scalar)

scalar = 'RLC_REWARD_'
scalar_list.append(scalar)

scalar = 'N_CH_TX_OK_'
scalar_list.append(scalar)

scalar = 'UE_REWARD_'
scalar_list.append(scalar)
GLOBAL_LOGGER.get_tb_logger().set_scalar_filter(scalar_list)
rm_config = PER_PROPORTIONAL_REPLAY_MEMORY_CONFIG(batch_size=drl_c.BATCH_SIZE, buffer_size=drl_c.BUFFER_SIZE,
                                                  seed=drl_c.SEED, alpha=0.7, total_step=env_c.N_EPISODE * env_c.N_STEP)
rm = PERProportional(0, rm_config)

agent = SimAgent(0, env_c.agent_config, rm)

env = SimEnvTxBinary_RewardShaping(0, env_c.sim_env_config, agent)

ddpg = MultiHeadCriticDDPG_NEW_PER(0, drl_c.ddpg_config)
ddpg.threshold = env_c.D_MIN_to_D_MAX_pct

controller = PySimController(0, drl_c.controller_config, agent, ddpg, rm)

env.start()
controller.start()
Exemple #15
0
    def step(self, batch):
        self._print("learn")
        states = to_tensor(batch[0])
        actions = to_tensor(batch[1])
        rewards = self._reward(to_tensor(batch[2]), states)
        next_states = to_tensor(batch[3])
        done = to_tensor(batch[4])
        self._print("states", batch[0])
        self._print("actions", batch[1])

        a = self.actor_target.forward(next_states)
        a = self._action_match(a)
        s_a = torch.cat((next_states, a), dim=1)
        q = self.critic_target.forward(s_a)
        y = torch.mul(q, self.config.rl_config.gamma)
        self._print("gamma", self.config.rl_config.gamma)
        self._print("rewards", rewards)
        self._print("q", q)

        y = torch.add(rewards, y).detach()
        self._print("y", y)

        actions = self._action_match(actions)
        s_a = torch.cat((states, actions), dim=1)
        q = self.critic.forward(s_a)
        l_critic = F.smooth_l1_loss(q, y, reduction='none')
        self._print("loss", l_critic)

        l_critic_per_batch = torch.sum(l_critic, dim=1, keepdim=True)
        self._print('l_critic_per_batch', l_critic_per_batch)
        ret_per_e = to_numpy(l_critic)
        ret_per_e = ret_per_e * self._per_w_multiplier(batch)
        self._print('ret_per_e_full', ret_per_e)
        ret_per_e = np.sum(ret_per_e, axis=1, keepdims=True)
        self._print('ret_per_e', ret_per_e)

        if len(batch) > 5:
            weights = to_tensor(batch[5])
            self._print("weights", weights)
            l_critic = torch.mul(l_critic_per_batch, weights)
            self._print("w_l_critic", l_critic)

        l_critic = torch.mean(l_critic)

        self.critic_optim.zero_grad()
        l_critic.backward()
        self.critic_optim.step()

        a = self.actor.forward(states)
        s_a = torch.cat((states, a), dim=1)
        l_actor = self.critic.forward(s_a)

        l_actor_per_batch = torch.sum(l_actor, dim=1, keepdim=True)
        if len(batch) > 5:
            weights = to_tensor(batch[5])
            self._print("weights", weights)
            l_actor = torch.mul(l_actor_per_batch, weights)
            self._print("w_l_actor", l_actor)

        l_actor = torch.mean(torch.neg(l_actor))

        self.actor_optim.zero_grad()
        l_actor.backward()
        self.actor_optim.step()

        GLOBAL_LOGGER.get_tb_logger().add_scalar('DDPG.loss_actor',
                                                 to_numpy(l_actor),
                                                 self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('DDPG.loss_critic',
                                                 to_numpy(l_critic),
                                                 self.n_step)

        self.update_nn()
        self.step_counter += 1

        return ret_per_e
Exemple #16
0
    def step(self, action):
        err = 0.
        if action.n_rb > 0:
            err = tx_error_rate_for_n_bytes(action.n_byte, action.n_rb,
                                            db_to_dec(self.get_snr_db()),
                                            self.config.T_f, self.config.rb_bw)

            if action.n_rb >= self.config.total_n_rb and err < 1e-5:
                err = 1e-5
            if err < 1e-5:
                ret = 5.
            else:
                ret = -math.log10(err)
        else:
            ret = 0.

        n_successful_tx = 1
        if p_true(err):
            n_successful_tx = 0

        GLOBAL_LOGGER.get_tb_logger().add_scalar('NRB_' + str(self.id),
                                                 action.n_rb, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('SNR_' + str(self.id),
                                                 self.get_snr_db(),
                                                 self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('E_' + str(self.id), err,
                                                 self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('DIS_' + str(self.id),
                                                 self.dis, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('CH_REWARD_' + str(self.id),
                                                 ret, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_CH_TX_OK_' + str(self.id),
                                                 n_successful_tx, self.n_step)
        self.change_position()

        return float(n_successful_tx)
Exemple #17
0
    def step(self, action):
        ret = 0.
        n_txed = 0
        if action.tx:
            ret = self.get_hol_reward()
            if self.queue:
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    'TX_DELAY_' + str(self.id), self.get_hol(), self.n_step)
                n_txed = 1
            self.pop()
        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_RLCTX_' + str(self.id),
                                                 n_txed, self.n_step)

        n_discard = self.push()
        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_PACKET_' + str(self.id),
                                                 self.n_packet, self.n_step)

        # assuming packet is arrived at the end of the last TTI
        self.time_step += 1

        n_discard += self.discard()

        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_DISCARD_' + str(self.id),
                                                 n_discard, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('RLC_REWARD_' + str(self.id),
                                                 ret, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('HOL_' + str(self.id),
                                                 self.get_hol(), self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('Qsize_' + str(self.id),
                                                 self.get_n_byte_total(),
                                                 self.n_step)
        return ret