コード例 #1
0
    def sample(self, batch_size=1, with_details=False):
        #num_sample
        if batch_size < 1:
            raise Exception(f'Wrong batch_size: {batch_size} < 1')

        inputs = torch.zeros([batch_size, self.controller_hid
                              ])  # batch_size([1, self.controller_hid 100])
        hidden = (torch.zeros([batch_size, self.controller_hid]),
                  torch.zeros([batch_size, self.controller_hid]))
        if self.is_cuda:
            inputs = inputs.cuda()
            hidden = (hidden[0].cuda(), hidden[1].cuda())
        entropies = []
        log_probs = []
        actions = []
        for block_idx, action_name in enumerate(self.action_list):
            decoder_index = self.action_index(action_name)

            logits, hidden = self.forward(
                inputs,  #Train [1, 100]   derive [100,100]   logits: [100,53]
                hidden,
                action_name,
                is_embed=(block_idx == 0))

            probs = F.softmax(logits, dim=-1)  #derive [100,53]
            log_prob = F.log_softmax(logits, dim=-1)

            entropy = -(log_prob * probs).sum(1, keepdim=False)
            action = probs.multinomial(num_samples=1).data  #derive [100,1]
            selected_log_prob = log_prob.gather(
                1, utils.get_variable(action, requires_grad=False))

            entropies.append(entropy)
            log_probs.append(selected_log_prob[:, 0])

            inputs = utils.get_variable(action[:, 0] +
                                        sum(self.num_tokens[:decoder_index]),
                                        self.is_cuda,
                                        requires_grad=False)

            inputs = self.encoder(inputs)

            actions.append(action[:, 0])

        actions = torch.stack(actions).transpose(0, 1)
        dags = self._construct_action(
            actions
        )  #[['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 16]]

        if with_details:
            return dags, torch.cat(log_probs), torch.cat(entropies)

        return dags
コード例 #2
0
    def sample(self, batch_size=1, with_details=False):

        if batch_size < 1:
            raise Exception('Wrong batch_size:' + str(batch_size) + '< 1')

        inputs = torch.zeros([batch_size, self.controller_hid])
        hidden = (torch.zeros([batch_size, self.controller_hid]),
                  torch.zeros([batch_size, self.controller_hid]))
        if self.is_cuda:
            inputs = inputs.cuda()
            hidden = (hidden[0].cuda(), hidden[1].cuda())
        entropies = []
        log_probs = []
        actions = []
        for block_idx, action_name in enumerate(self.action_list):
            decoder_index = self.action_index(action_name)

            logits, hidden = self.forward(inputs,
                                          hidden,
                                          action_name,
                                          is_embed=(block_idx == 0))

            probs = F.softmax(logits, dim=-1)
            log_prob = F.log_softmax(logits, dim=-1)

            entropy = -(log_prob * probs).sum(1, keepdim=False)
            action = probs.multinomial(num_samples=1).data
            selected_log_prob = log_prob.gather(
                1, utils.get_variable(action, requires_grad=False))

            entropies.append(entropy)
            log_probs.append(selected_log_prob[:, 0])

            inputs = utils.get_variable(action[:, 0] +
                                        sum(self.num_tokens[:decoder_index]),
                                        self.is_cuda,
                                        requires_grad=False)

            inputs = self.encoder(inputs)

            actions.append(action[:, 0])

        actions = torch.stack(actions).transpose(0, 1)
        print('sampled actions::')
        print(actions)
        dags = construct_actions(actions, self.action_list, self.search_space)

        if with_details:
            return dags, torch.cat(log_probs), torch.cat(entropies)

        return dags
コード例 #3
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)
        total_loss = 0
        for step in range(self.args.controller_max_step):
            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                continue  # CUDA Error happens, drop structure and step into next iteration

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()

        print("*" * 35, "training controller over", "*" * 35)
コード例 #4
0
 def init_hidden(self, batch_size):
     zeros = torch.zeros(batch_size, self.controller_hid)
     return (utils.get_variable(zeros, self.is_cuda, requires_grad=False),
             utils.get_variable(zeros.clone(),
                                self.is_cuda,
                                requires_grad=False))
コード例 #5
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)  #2 64 100
        total_loss = 0
        for step in range(self.args.controller_max_step):  # 100
            start_time = time()
            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)  # num_sample = 1
            #[['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 16]]
            #tensor([-1.9461, -1.3946, -2.0890, -1.7695, -1.9348, -1.9490, -1.3980, -2.0886,-1.7938, -1.9401], device='cuda:0', grad_fn=<CatBackward>)
            #tensor([1.9459, 1.3863, 2.0794, 1.7917, 1.9458, 1.9459, 1.3862, 2.0794, 1.7917,1.9458], device='cuda:0', grad_fn=<CatBackward>)
            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                continue  # CUDA Error happens, drop structure and step into next iteration

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()
            elapsed = (time() - start_time)
            print('[%d/%d] time %.2f ' % (
                step + 1,
                self.args.controller_max_step,
                elapsed,
            ))
        print("*" * 35, "training controller over", "*" * 35)
コード例 #6
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)
        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.controller.init_hidden(self.args.batch_size)
        total_loss = 0
        for step in range(self.args.controller_max_step):
            #contraller训练一次的时间
            once_controller_train_start_time = time.time()

            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(with_details=True)

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            results = self.get_reward(structure_list, np_entropies, hidden)
            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden, acc = results
            else:
                continue

            # discount
            if 1 > self.args.discount > 0:
                rewards = discount(rewards, self.args.discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)
            # policy loss
            loss = -log_probs * adv
            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()
            once_controller_train_end_time = time.time()
            print("the", step, "epcoh controller train time: ",
                  once_controller_train_end_time-once_controller_train_start_time, "s")

            if step == 0:
                self.RL_train_time.append(once_controller_train_start_time)
                self.RL_train_time.append(once_controller_train_end_time)
                self.RL_train_acc.append(acc)
            else:
                self.RL_train_time.append(once_controller_train_end_time)
                self.RL_train_acc.append(acc)
        print("all RL train time list: ", self.RL_train_time)
        print("all RL train acc list: ", self.RL_train_acc)
        print("*" * 35, "training controller over", "*" * 35)
        experiment_data_save("controler_train.txt", self.RL_train_time, self.RL_train_acc)
コード例 #7
0
    def sample(self, batch_size=1, with_details=False):

        if batch_size < 1:
            raise Exception(f'Wrong batch_size: {batch_size} < 1')

        # LSTM带batch_size的输入x0初始化,全0矩阵
        inputs = torch.zeros([batch_size, self.controller_hid])
        # inputs.shape = (1, 100)
        # batch_size = 1, self.controller_hid = 100

        # LSTM带batch_size参数的h0,c0初始化,全0矩阵
        hidden = (torch.zeros([batch_size, self.controller_hid]),
                  torch.zeros([batch_size, self.controller_hid]))
        # shape(hidden) = ([1,100],[1,100])

        if self.is_cuda:
            inputs = inputs.cuda()
            hidden = (hidden[0].cuda(), hidden[1].cuda())

        entropies = []
        log_probs = []
        actions = []

        for block_idx, action_name in enumerate(self.action_list):

            decoder_index = self.action_index(action_name)

            logits, hidden = self.forward(inputs,
                                          hidden,
                                          action_name,
                                          is_embed=(block_idx == 0))

            probs = F.softmax(logits, dim=-1)

            log_prob = F.log_softmax(logits, dim=-1)

            entropy = -(log_prob * probs).sum(1, keepdim=False)

            # 基于多项式分布从probs概率向量中选取向量的索引号,该索引号对应相应action中的一个operator
            # 为了保证强化学习的探索性,所以没有直接选取最大的概率的operator,而是通过概率分布选取
            action = probs.multinomial(num_samples=1).data

            # 基于action获取的索引号获取对应的log_prob(对应概率为p(x)随机变量自信息I(x))
            selected_log_prob = log_prob.gather(
                1, utils.get_variable(action, requires_grad=False))
            # gather(log_prob,1,action)
            # 对log_prob按列取,取值索引号为action,取值后矩阵大小与action相同

            entropies.append(entropy)

            log_probs.append(selected_log_prob[:, 0])

            # 确定action对应的operator在embedding中的索引号
            inputs = utils.get_variable(action[:, 0] +
                                        sum(self.num_tokens[:decoder_index]),
                                        self.is_cuda,
                                        requires_grad=False)
            # 对确定了索引号inputs进行embedding编码
            inputs = self.encoder(inputs)
            # 将已经选择的operator索引号放入actions列表中保存
            actions.append(action[:, 0])

        actions = torch.stack(actions).transpose(0, 1)

        dags = self._construct_action(actions)

        if with_details:
            return dags, torch.cat(log_probs), torch.cat(entropies)

        return dags
コード例 #8
0
    def train_controller(self):
        """
            Train controller to find better structure.
        """
        print("*" * 35, "training controller", "*" * 35)

        model = self.controller

        # 使pytorch定义的controller模型进入训练模式
        # 每次训练都要初始化 adv_history,entropy_history,reward_history 三个列表
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        # 初始化带batch_size参数的中 h0,c0向量,全部为0
        hidden = self.controller.init_hidden(self.args.batch_size)
        # self.args.batch_size = 64

        # 初始化控制器LSTM模型总损失值
        total_loss = 0

        for step in range(self.args.controller_max_step):
            # self.args.controller_max_step = 100
            # controller每次训练100轮,一轮选一个GNN结构

            # sample graphnas
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)
            # structrue_list = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 4],--->选择出的GNN结构
            # log_probs = tensor([-1.9461, -1.3936, -2.0807, -1.7964, -1.9570, -1.9413, -1.3704, -2.0878,
            #         -1.7907, -1.9185], device='cuda:0', grad_fn=<CatBackward>)--->LSTM每一步选择的operator的自信息I
            # entropies = tensor([1.9459, 1.3863, 2.0794, 1.7917, 1.9458, 1.9459, 1.3862, 2.0794, 1.7917,
            #         1.9458], device='cuda:0', grad_fn=<CatBackward>)--->LSTM每一步输出信息熵

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()

            results = self.get_reward(structure_list, np_entropies, hidden)

            # results = (rewards, hidden) hidden原封不动的回来了

            torch.cuda.empty_cache()

            if results:  # has reward
                rewards, hidden = results
            else:
                continue  # CUDA Error happens, drop structure and step into next iteration

            # discount
            # 使用滤波器实现rewards折扣损失计算,重新计算rewards列表内的reward
            if 1 > self.args.discount > 0:
                # self.args.discount = 1
                rewards = discount(rewards, self.args.discount)
                # 每次训练都要初始化 adv_history,entropy_history,reward_history 三个列表
                # controller每次训练100轮,一轮选一个GNN结构
                """"
def discount(x, amount):
    return scipy.signal.lfilter([1], [1, -amount], x[::-1], axis=0)[::-1]
    
    x[::-1] : 将x序列翻转
                """

            reward_history.extend(rewards)

            entropy_history.extend(np_entropies)

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = self.args.ema_baseline_decay
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline

            history.append(adv)

            adv = scale(adv, scale_value=0.5)
            """
 def scale(value, last_k=10, scale_value=1):
    '''
    scale value into [-scale_value, scale_value], according last_k history
    '''
    max_reward = np.max(history[-last_k:])
    if max_reward == 0:
        return value
    return scale_value / max_reward * value
            
            """

            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.cuda, requires_grad=False)

            # policy loss
            loss = -log_probs * adv

            if self.args.entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            self.controller_step += 1
            torch.cuda.empty_cache()

        print("*" * 35, "training controller over", "*" * 35)