def evaluate(self, source, dag, name, batch_size=1, max_num=None):
        """Evaluate on the validation set.
        """
        self.shared.eval()
        self.controller.eval()

        if self.image_dataset:
            data = source
        else:
            data = source[:max_num * self.max_length]

        total_loss = 0
        hidden = self.shared.init_training(batch_size)

        pbar = range(0, self.valid_data_size - 1, self.max_length)
        for count, idx in enumerate(pbar):
            inputs, targets = self.get_batch(data, idx, volatile=True)
            output, hidden, _ = self.shared(inputs,
                                            dag,
                                            hidden=hidden,
                                            is_train=False)
            output_flat = output.view(-1, self.dataset.num_classes)
            total_loss += len(inputs) * self.ce(output_flat, targets).data
            hidden = utils.detach(hidden)
            ppl = math.exp(
                utils.to_item(total_loss) / (count + 1) / self.max_length)

        val_loss = utils.to_item(total_loss) / len(data)
        ppl = math.exp(val_loss)

        self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch)
        self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch)
        logger.info(f'eval | loss: {val_loss:8.2f} | ppl: {ppl:8.2f}')
Exemple #2
0
    def evaluate(self, source, dag, name, batch_size=1, max_num=None):
        """Evaluate on the validation set.

        NOTE(brendan): We should not be using the test set to develop the
        algorithm (basic machine learning good practices).
        """
        self.shared.eval()
        self.controller.eval()

        data = source[:max_num * self.max_length]

        total_loss = 0
        hidden = self.shared.init_hidden(batch_size)

        pbar = range(0, data.size(0) - 1, self.max_length)
        for count, idx in enumerate(pbar):
            inputs, targets = self.get_batch(data, idx, volatile=True)
            output, hidden = self.shared(inputs,
                                         dag,
                                         prev_s=hidden,
                                         is_training=False)
            output_flat = output.view(-1, self.dataset.num_tokens)
            total_loss += len(inputs) * self.ce(output_flat, targets).data
            hidden = hidden.detach_()
            ppl = math.exp(
                utils.to_item(total_loss) / (count + 1) / self.max_length)

        val_loss = utils.to_item(total_loss) / len(data)
        ppl = math.exp(val_loss)

        self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch)
        self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch)
        logger.info(f'eval | loss: {val_loss:8.2f} | ppl: {ppl:8.2f}')
    def get_reward(self, dags, entropies, hidden=None, valid_idx=None):

        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        if valid_idx:
            valid_idx = 0
        self.reset_dataloader_by_name('valid')

        inputs, targets = self.get_batch(self.valid_data,
                                         valid_idx,
                                         self.max_length,
                                         volatile=True)

        _, valid_loss, _, extra_out = self.get_loss(inputs, targets, dags)
        valid_loss = utils.to_item(valid_loss.data)

        valid_acc = utils.to_item(extra_out['acc'])

        if self.args.ppl_square:
            R = self.args.reward_c * valid_acc**2
        else:
            R = self.args.reward_c * valid_acc

        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies

        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unkown entropy mode: {self.args.entropy_mode}')

        return rewards, None
    def _summarize_shared_train(self, total_loss, raw_total_loss):
        """Logs a set of training steps."""
        cur_loss = utils.to_item(total_loss) / self.args.log_step_shared
        # NOTE(brendan): The raw loss, without adding in the activation
        # regularization terms, should be used to compute ppl.
        cur_raw_loss = utils.to_item(
            raw_total_loss) / self.args.log_step_shared

        logger.info(f'| epoch {self.epoch:3d} '
                    f'| lr {self.args.shared_lr:.2f} '
                    f'| raw loss {cur_raw_loss:.2f} '
                    f'| loss {cur_loss:.2f} ')
    def _summarize_shared_train(self, total_loss, raw_total_loss, acc=0):
        """Logs a set of training steps."""
        cur_loss = utils.to_item(total_loss) / self.args.log_step
        cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step

        logger.info(f'| epoch {self.epoch:3d} '
                    f'| lr {self.shared_lr:4.2f} '
                    f'| raw loss {cur_raw_loss:.2f} '
                    f'| loss {cur_loss:.2f} '
                    f'| acc {acc:8.2f}')

        # Tensorboard
        if self.tb is not None:
            self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step)
            self.tb.scalar_summary('shared/accuracy', acc, self.shared_step)
Exemple #6
0
    def _summarize_shared_train(self, total_loss, raw_total_loss):
        """Logs a set of training steps."""
        cur_loss = utils.to_item(total_loss) / self.args.log_step
        # NOTE(brendan): The raw loss, without adding in the activation
        # regularization terms, should be used to compute ppl.
        cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step
        ppl = math.exp(cur_raw_loss)

        logger.info(
            '| epoch {0:3d} | lr {1:4.2f} | raw loss {2:.2f} | loss {3:.2f} | ppl {4:8.2f}'
            .format(self.epoch, self.shared_lr, cur_raw_loss, cur_loss, ppl))

        # Tensorboard
        if self.tb is not None:
            self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step)
            self.tb.scalar_summary('shared/perplexity', ppl, self.shared_step)
Exemple #7
0
    def get_reward(self, dag, entropies, hidden, valid_idx=0):
        """Computes the perplexity of a single sampled model on a minibatch of
        validation data.
        计算模型的PPL:每个词的条件预测概率(即已知前n个词预测第n+1个词的概率)的累积的倒数开N(全体词的数量)次方
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        inputs, targets = self.get_batch(self.valid_data,
                                         valid_idx,
                                         self.max_length,
                                         volatile=True)
        valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden,
                                              dag)  #RNN.forward
        valid_loss = utils.to_item(valid_loss.data)

        valid_ppl = math.exp(valid_loss)  #计算PPL

        # TODO: we don't know reward_c
        if self.args.ppl_square:  #default:false
            # TODO: but we do know reward_c=80 in the previous paper
            R = self.args.reward_c / valid_ppl**2
        else:
            R = self.args.reward_c / valid_ppl  #这个值的作用在NAS(Zoph and Le, 2017) page 8 states that c is a constant

        if self.args.entropy_mode == 'reward':  #entroy_mode:default:reward
            rewards = R + self.args.entropy_coeff * entropies  # entropy_coeff:default=1e-4
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError('Unkown entropy mode: {0}'.format(
                self.args.entropy_mode))

        return rewards, hidden
    def evaluate(self, dag, batch_size=1):
        """Evaluate on the validation set.

        NOTE(brendan): We should not be using the test set to develop the
        algorithm (basic machine learning good practices).
        """
        self.shared.eval()
        self.controller.eval()

        #val_loss = 0
        dice_score = 0
        valid_dataloader = brats_dataloader(self.val,
                                            self.args.batch_size,
                                            None,
                                            1,
                                            infinite=False,
                                            return_incomplete=True)
        for batch in valid_dataloader:
            inputs = torch.from_numpy(batch['data']).cuda()
            targets = torch.from_numpy(batch['seg'].astype(int)).cuda()
            targets = get_multi_class_labels(targets,
                                             n_labels=self.args.n_classes)
            dice_score += utils.to_item(self.get_score(inputs, targets, dag))
        #val_loss =val_loss/len(valid_dataloader)
        dice_score = dice_score / len(valid_dataloader)
        """
        self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch)
        self.tb.scalar_summary(f'eval/{name}_dice_score', dice_score, self.epoch)
        """
        self.logger.info(f'eval | dice_score: {dice_score:8.2f}')
        return dice_score
Exemple #9
0
    def get_reward(self, dag, entropies, data_iter):
        """Computes the perplexity of a single sampled model on a minibatch of
        validation data.
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()
        try:
            inputs, targets = data_iter.next()
        except StopIteration:
            data_iter = iter(self.valid_data)
            inputs, targets = data_iter.next()
        #TODO 怎么做volidate
        valid_loss = self.get_loss(inputs, targets, dag)
        # convert valid_loss to numpy ndarray
        valid_loss = utils.to_item(valid_loss.data)

        valid_ppl = math.exp(valid_loss)

        # TODO we don't knoe reward_c
        if self.args.ppl_square:
            #TODO: but we do know reward_c =80 in the previous paper need to read previous paper
            R = self.args.reward_c / valid_ppl**2
        else:
            R = self.args.reward_c / valid_ppl

        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unknown entropy mode: {self.args.entropy_mode}')

        return rewards
    def evaluate(self, source, dag, name, batch_size=1, max_num=None):
        """Evaluate on the entire validation set.
        Reset the data_generator every time.

        """
        self.shared.eval()
        self.controller.eval()

        self.reset_dataloader_by_name('eval')
        data = self.eval_data

        total_loss = 0
        acc = 0

        for count, (inputs, targets) in enumerate(data):
            inputs = inputs.to(self.device)
            targets = targets.to(self.device)
            output, _ = self.shared(inputs, dag, is_train=False)
            output_flat = output.view(-1, self.dataset.num_classes)
            total_loss += len(inputs) * self.ce(output_flat, targets).data
            acc += self.top_k_acc(output_flat, targets)

        val_loss = utils.to_item(total_loss) / len(data)
        acc /= count

        self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch)
        self.tb.scalar_summary(f'eval/{name}_top_1_acc', acc, self.epoch)
        logger.info(f'eval | loss: {val_loss:8.2f} | top_1_acc: {acc:8.2f}')
Exemple #11
0
def _construct_dags(prev_nodes, activations, func_names, num_blocks):
    """Constructs a set of DAGs based on the actions, i.e., previous nodes and
    activation functions, sampled from the controller/policy pi.

    Args:
        prev_nodes: Previous node actions from the policy.
        activations: Activations sampled from the policy.
        func_names: Mapping from activation function names to functions.
        num_blocks: Number of blocks in the target RNN cell.

    Returns:
        A list of DAGs defined by the inputs.

    RNN cell DAGs are represented in the following way:

    1. Each element (node) in a DAG is a list of `Node`s.

    2. The `Node`s in the list dag[i] correspond to the subsequent nodes
       that take the output from node i as their own input.

    3. dag[-1] is the node that takes input from x^{(t)} and h^{(t - 1)}.
       dag[-1] always feeds dag[0].
       dag[-1] acts as if `w_xc`, `w_hc`, `w_xh` and `w_hh` are its
       weights.

    4. dag[N - 1] is the node that produces the hidden state passed to
       the next timestep. dag[N - 1] is also always a leaf node, and therefore
       is always averaged with the other leaf nodes and fed to the output
       decoder.
    """
    dags = []
    for nodes, func_ids in zip(prev_nodes, activations):
        dag = collections.defaultdict(list)

        # add first node
        logger.info(
            f'[Debug] The size of func_names: {len(func_names)}, i:{func_ids[0]}'
        )
        dag[-1] = [Node(0, func_names[func_ids[0]])]
        dag[-2] = [Node(0, func_names[func_ids[0]])]

        # add following nodes
        for jdx, (idx, func_id) in enumerate(zip(nodes, func_ids[1:])):
            dag[utils.to_item(idx)].append(Node(jdx + 1, func_names[func_id]))

        leaf_nodes = set(range(num_blocks)) - dag.keys()

        # merge with avg
        for idx in leaf_nodes:
            dag[idx] = [Node(num_blocks, 'avg')]

        # TODO(brendan): This is actually y^{(t)}. h^{(t)} is node N - 1 in
        # the graph, where N Is the number of nodes. I.e., h^{(t)} takes
        # only one other node as its input.
        # last h[t] node
        last_node = Node(num_blocks + 1, 'h[t]')
        dag[num_blocks] = [last_node]
        dags.append(dag)

    return dags
Exemple #12
0
    def get_reward(self, dag, entropies, hidden, valid_idx=0):
        """Computes the perplexity of a single sampled model on a minibatch of
        validation data.
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        inputs, targets = self.get_batch(self.valid_data,
                                         valid_idx,
                                         self.max_length,
                                         volatile=True)
        valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag)
        valid_loss = utils.to_item(valid_loss.data)

        valid_ppl = math.exp(valid_loss)

        # TODO: we don't know reward_c
        if self.args.ppl_square:
            # TODO: but we do know reward_c=80 in the previous paper
            R = self.args.reward_c / valid_ppl**2
        else:
            R = self.args.reward_c / valid_ppl

        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unkown entropy mode: {self.args.entropy_mode}')

        return rewards, hidden
Exemple #13
0
    def evaluate(self, source, dag, max_num=None):
        self.shared.eval()
        self.controller.eval()

        if max_num == None:
            max_num = source.size(0)
        else:
            max_num *= self.max_length
        data = source[:max_num]

        total_loss = 0
        hidden = None

        pbar = range(0, data.size(0) - 1, self.max_length)
        for count, idx in enumerate(pbar):
            inputs, targets = self.get_batch(data, idx)
            output, hidden, _ = self.shared(inputs,
                                            dag,
                                            hidden=hidden,
                                            is_train=False)
            output_flat = output.view(-1, self.dataset.num_tokens)
            total_loss += len(inputs) * self.ce(output_flat, targets).data

        val_loss = utils.to_item(total_loss) / len(data)
        ppl = math.exp(val_loss)

        return ppl
    def get_reward(self, dag, entropies):
        """Computes the perplexity of a single sampled model on a minibatch of
        validation data.
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        # Declare validation data here

        inputs = self.valid_queue[0]
        target = self.valid_queue[1]

        valid_ppl = self.get_loss(inputs, target, dag, mode='Valid')
        valid_ppl = utils.to_item(valid_ppl.data)

        # TODO: we don't know reward_c
        if self.args.ppl_square:
            # TODO: but we do know reward_c=80 in the previous paper
            R = self.args.reward_c / valid_ppl**2
        else:
            R = self.args.reward_c / valid_ppl

        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unkown entropy mode: {self.args.entropy_mode}')

        return rewards
Exemple #15
0
    def _summarize_shared_train(self, total_loss, raw_total_loss):
        """Logs a set of training steps."""
        cur_loss = utils.to_item(total_loss) / self.args.log_step
        # NOTE(brendan): The raw loss, without adding in the activation
        # regularization terms, should be used to compute ppl.
        cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step
        ppl = math.exp(cur_raw_loss)

        print(f'| epoch {self.epoch:3d} '
              f'| lr {self.shared_lr:4.2f} '
              f'| raw loss {cur_raw_loss:.2f} '
              f'| loss {cur_loss:.2f} '
              f'| ppl {ppl:8.2f}')

        # Tensorboard
        if self.tb is not None:
            self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step)
            self.tb.scalar_summary('shared/perplexity', ppl, self.shared_step)
        def _construct_micro_cnn_dags(prev_nodes, activations, func_names,
                                      num_blocks):
            """Constructs a set of DAGs based on the actions, i.e., previous nodes and
            activation functions, sampled from the controller/policy pi.

            This will be tailored for CNN only. Not the afore-mentioned RNN.

            Args:
                prev_nodes: Previous node actions from the policy.
                activations: Activations sampled from the policy.
                func_names: [normal_func_names, reduce_func_names]
                num_blocks: Number of blocks in the target RNN cell.

            Returns:
                A list of DAGs defined by the inputs.

            CNN cell DAGs are represented in the following way:

            1. entire DAG is represent as a simple list, of element 2
                [ Normal-Cell, Reduction-Cell ]
            2. each element is another list, containing such information
                [ (node_id1, node_id2, ops), ] * num_blocks
                    represents node1 -- ops --> node 2

            3. node 0, represents the h(t-1), i.e. previous layer input
               node 1, represents the h(t), i.e. current input
                    so, the actually index for current block starts from2

            """
            dags = []
            for nodes, func_ids in zip(prev_nodes, activations):
                dag = []

                # compute the first node
                # dag.append(MicroNode(0, 2, func_names[func_ids[0]]))
                # dag.append(MicroNode(1, 2, func_names[func_ids[0]]))
                leaf_nodes = set(range(2, num_blocks + 2))

                # add following nodes
                for curr_idx, (prev_idx,
                               func_id) in enumerate(zip(nodes, func_ids)):
                    layer_id = curr_idx // 2 + 2
                    _prev_idx = utils.to_item(prev_idx)
                    if _prev_idx == layer_id:
                        continue
                    assert _prev_idx < layer_id, "Crutial logical error"
                    dag.append(
                        MicroNode(_prev_idx, layer_id, func_names[func_id]))
                    leaf_nodes -= set([_prev_idx])

                # add leaf node connection with concat
                # for idx in leaf_nodes:
                #     dag.append(MicroNode(idx, num_blocks, 'concat'))
                dag.sort()
                dags.append(dag)

            return dags
    def _summarize_shared_train(self, total_loss, raw_total_loss):
        """Logs a set of training steps."""
        cur_loss = utils.to_item(total_loss) / self.args.log_step
        cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step
        try:
            ppl = math.exp(cur_raw_loss)
        except RuntimeError as e:
            print(f"Got error {e}")

        logger.info(f'| epoch {self.epoch:3d} '
                    f'| lr {self.shared_lr:4.2f} '
                    f'| raw loss {cur_raw_loss:.2f} '
                    f'| loss {cur_loss:.2f} '
                    f'| ppl {ppl:8.2f}')

        # Tensorboard
        if self.tb is not None:
            self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step)
            self.tb.scalar_summary('shared/perplexity', ppl, self.shared_step)
Exemple #18
0
    def evaluate(self, test_iter, dag, name, batch_size=1, max_num=None):
        """Evaluate on the validation set.
        (lianqing)what is the data of source ?

        NOTE: use validation to check reward but test set is the same as valid set
        """
        self.shared.eval()
        self.controller.eval()
        acc = AverageMeter()
        # data = source[:max_num*self.max_length]
        total_loss = 0
        # pbar = range(0, data.size(0) - 1, self.max_length)
        count = 0
        while True:
            try:
                count += 1
                inputs, targets = next(test_iter)
            except StopIteration:
                print("========> finish evaluate on one epoch<======")
                break
                test_iter = iter(self.test_data)
                inputs, targets = next(test_iter)
                # inputs = Variable(inputs)
            #check if is train the controller will have what difference
            inputs = Variable(inputs.cuda())
            targets = Variable(targets.cuda())
            # inputs = inputs.cuda()
            #targets = targets.cuda()
            output = self.shared(inputs, dag, is_train=False)
            # check is self.loss wil work ?:
            total_loss += len(inputs) * self.ce(output, targets).data
            ppl = math.exp(utils.to_item(total_loss) / (count + 1))
            acc.update(utils.get_accuracy(targets, output))
        val_loss = utils.to_item(total_loss) / count
        ppl = math.exp(val_loss)
        #TODO it's fix for rnn need to fix for cnn
        #self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch)
        #self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch)
        print(
            f'eval | loss: {val_loss:8.2f} | ppl: {ppl:8.2f} | accuracy: {acc.avg:8.2f}'
        )
 def _evaluate_valid(dag):
     hidden_eval = self.shared.init_training(self.args.batch_size)
     inputs_eval, targets_eval = self.get_batch(self.valid_data,
                                                0,
                                                self.max_length,
                                                volatile=True)
     _, valid_loss_eval, _, _, _ = self.get_loss(inputs_eval,
                                                 targets_eval,
                                                 dag,
                                                 hidden=hidden_eval)
     valid_loss_eval = utils.to_item(valid_loss_eval.data)
     valid_ppl_eval = math.exp(valid_loss_eval)
        def _clip_gradient(abs_max_grad, abs_max_hidden_norm):

            h1tohT = extra_out['hiddens']
            new_abs_max_hidden_norm = utils.to_item(
                h1tohT.norm(dim=-1).data.max())
            if new_abs_max_hidden_norm > abs_max_hidden_norm:
                abs_max_hidden_norm = new_abs_max_hidden_norm
                logger.info(f'max hidden {abs_max_hidden_norm}')
            abs_max_grad = _check_abs_max_grad(abs_max_grad, model)
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          self.args.shared_grad_clip)
            return abs_max_grad, abs_max_hidden_norm
        def _evaluate_valid(dag):
            self.valid_data = iter(self._valid_data)
            inputs_eval, targets_eval = self.get_batch(self.valid_data,
                                                       0,
                                                       self.max_length,
                                                       volatile=True)
            _, valid_loss_eval, _, extra_out = self.get_loss(
                inputs_eval, targets_eval, dag)
            valid_loss_eval = utils.to_item(valid_loss_eval.data)
            valid_acc_eval = extra_out['acc']

            return valid_loss_eval, valid_acc_eval
Exemple #22
0
def _check_max_hidden(abs_max_hidden_norm, h1tohT):
    """Debugging function that checks if `h1tohT` contains a new largest hidden
    state (as measured by L2 norm), and returns the (potentially updated)
    largest hidden state L2 norm.
    """
    new_abs_max_hidden_norm = utils.to_item(
        h1tohT.norm(p=2, dim=-1).data.max())
    if new_abs_max_hidden_norm > abs_max_hidden_norm:
        abs_max_hidden_norm = new_abs_max_hidden_norm
        logger.info(f'max hidden {abs_max_hidden_norm}')

    return abs_max_hidden_norm
Exemple #23
0
    def evaluate(self, source, dag, name, batch_size=1, max_num=None):
        """Evaluate dag (child model) on the validation set.
           PT: only if validation set data is passed-in in source

           (compare to eval_once in the Tensorflow implementation )

        NOTE(brendan): We should not be using the test set to develop the
        algorithm (basic machine learning good practices).
        """
        self.shared.eval()
        self.controller.eval()

        data = source[:max_num * self.max_length]

        total_loss = 0
        hidden = self.shared.init_hidden(batch_size)

        pbar = range(0, data.size(0) - 1, self.max_length)
        for count, idx in enumerate(pbar):
            inputs, targets = self.get_batch(data, idx, volatile=True)
            output, hidden, _ = self.shared(inputs,
                                            dag,
                                            hidden=hidden,
                                            is_train=False)
            output_flat = output.view(-1, self.dataset.num_tokens)
            total_loss += len(inputs) * self.ce(output_flat, targets).data
            hidden.detach_()
            #PT: Nothing seems to be done with this ppl (?)
            ppl = math.exp(
                utils.to_item(total_loss) / (count + 1) / self.max_length)

        val_loss = utils.to_item(total_loss) / len(data)
        ppl = math.exp(val_loss)

        self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch)
        self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch)
        logger.info(f'eval {name} | loss: {val_loss:8.2f} | ppl: {ppl:8.2f}')
        return val_loss, ppl
    def evaluate(self, dag):
        """Evaluate on the validation set.
        NOTE(brendan): We should not be using the test set to develop the
        algorithm (basic machine learning good practices).
        """

        with torch.no_grad():
            total_loss = 0

            inputs = self.test_queue[0]
            targets = self.test_queue[1]
            output = self.shared(inputs, dag[0])
            total_loss = self.criterion_controller(output, targets).data
            test_mae = utils.to_item(total_loss)
            logger.info(f'dag = {dag}')
            logger.info(f'eval | test mae: {test_mae:8.2f}')
Exemple #25
0
    def get_reward(self, dag, entropies, hidden, valid_idx=0):
        """Computes the perplexity of a single sampled model on a minibatch of
        validation data.
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        if valid_idx:
            valid_idx = 0
        #self.valid_data.size()=[1152,64]
        #self.max_length=35 ("gradient w is computed using back-propagation through
        # time truncted to 35 time steps" - Section 2.2 2nd paragraph)
        # inputs.size() = [35,64]
        # targets.size() = 2240 = 35*64
        inputs, targets = self.get_batch(self.valid_data,
                                         valid_idx,
                                         self.max_length,
                                         volatile=True)
        valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag)
        #hidden.size() = [64,1000] -> 64 is minibatch size, 1000??
        #
        valid_loss = utils.to_item(valid_loss.data)
        #torch.onnx.export(self.shared, inputs, "dag.onnx")

        #perplexity
        valid_ppl = math.exp(valid_loss)

        # TODO: we don't know reward_c
        if self.args.ppl_square:
            # TODO: but we do know reward_c=80 in the previous paper
            R = self.args.reward_c / valid_ppl**2
        else:
            R = self.args.reward_c / valid_ppl

        #entropies - python array with 23 values
        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unkown entropy mode: {self.args.entropy_mode}')

        return rewards, hidden, valid_ppl
    def get_reward(self, dags, entropies, inputs, targets):
        """Computes the dicescore of a single sampled model on a minibatch of
        validation data.
        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        score = self.get_score(inputs, targets, dags)
        #score=1-self.get_loss(inputs,targets,dags)
        print(score.item())
        R = utils.to_item(score.data)

        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies.mean()
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unkown entropy mode: {self.args.entropy_mode}')

        return rewards
    def get_reward(self, dags, entropies, hidden, valid_idx=None):
        """
        Computes the reward of a single sampled model or multiple on a minibatch of
        validation data.

        """
        if not isinstance(entropies, np.ndarray):
            entropies = entropies.data.cpu().numpy()

        if valid_idx is None:
            valid_idx = 0

        inputs, targets = self.get_batch(self.valid_data,
                                         valid_idx,
                                         self.max_length,
                                         volatile=True)
        _, valid_loss, _, hidden, _ = self.get_loss(inputs,
                                                    targets,
                                                    dags,
                                                    hidden=hidden)
        valid_loss = utils.to_item(valid_loss.data)

        valid_ppl = math.exp(valid_loss)

        if self.args.ppl_square:
            R = self.args.reward_c / valid_ppl**2
        else:
            R = self.args.reward_c / valid_ppl

        if self.args.entropy_mode == 'reward':
            rewards = R + self.args.entropy_coeff * entropies
        elif self.args.entropy_mode == 'regularizer':
            rewards = R * np.ones_like(entropies)
        else:
            raise NotImplementedError(
                f'Unkown entropy mode: {self.args.entropy_mode}')

        return rewards, hidden
Exemple #28
0
def _construct_dags(prev_nodes, activations, func_names, num_blocks):
    dags = []
    for nodes, func_ids in zip(prev_nodes, activations):
        dag = collections.defaultdict(list)

        # add first node
        dag[-1] = [Node(0, func_names[func_ids[0]])]
        dag[-2] = [Node(0, func_names[func_ids[0]])]

        # add following nodes
        for jdx, (idx, func_id) in enumerate(zip(nodes, func_ids[1:])):
            dag[utils.to_item(idx)].append(Node(jdx + 1, func_names[func_id]))

        leaf_nodes = set(range(num_blocks)) - dag.keys()

        # merge with avg
        for idx in leaf_nodes:
            dag[idx] = [Node(num_blocks, 'avg')]

        last_node = Node(num_blocks + 1, 'h[t]')
        dag[num_blocks] = [last_node]
        dags.append(dag)

    return dags
Exemple #29
0
def _construct_dags(prev_nodes, activations, func_names, num_blocks):
    """Constructs a set of DAGs based on the actions, i.e., previous nodes and
    activation functions, sampled from the controller/policy pi.

    Args:
        prev_nodes: Previous node actions from the policy.
        activations: Activations sampled from the policy.
        func_names: Mapping from activation function names to functions.
        num_blocks: Number of blocks in the target RNN cell.

    Returns:
        A list of DAGs defined by the inputs.

    RNN cell DAGs are represented in the following way:

    1. Each element (node) in a DAG is a list of `Node`s.

    2. The `Node`s in the list dag[i] correspond to the subsequent nodes
       that take the output from node i as their own input.

    3. dag[-1] is the node that takes input from x^{(t)} and h^{(t - 1)}.
       dag[-1] always feeds dag[0].
       dag[-1] acts as if `w_xc`, `w_hc`, `w_xh` and `w_hh` are its
       weights.

    4. dag[N - 1] is the node that produces the hidden state passed to
       the next timestep. dag[N - 1] is also always a leaf node, and therefore
       is always averaged with the other leaf nodes and fed to the output
       decoder.
    """
    dags = []
    for nodes, func_ids in zip(prev_nodes,
                               activations):  #nodes[1,11]  func_ids[1,12]
        #这里由于prev_nodes是[[1,11]],activations是[[1,12]],zip之后是[[1,11],[1,12]],实际上这个循环也就运行一次,
        dag = collections.defaultdict(list)

        # add first node
        dag[-1] = [Node(0, func_names[func_ids[0]])]
        dag[-2] = [Node(0, func_names[func_ids[0]])]

        # add following nodes   Node:utils.py->Node = collections.namedtuple('Node', ['id', 'name'])
        #dag里面存的就是一个图的所有信息,激活函数,前一个结点
        for jdx, (idx, func_id) in enumerate(zip(nodes, func_ids[1:])):
            dag[utils.to_item(idx)].append(Node(
                jdx + 1, func_names[func_id]))  #utils.to_item()返回tensor对应的数值
        #叶子结点:后面没有其他结点的结点,是一个set,保证索引不重复
        leaf_nodes = set(range(num_blocks)) - dag.keys()

        # merge with avg,将所有叶子结点归并都一个avg结点中
        for idx in leaf_nodes:
            dag[idx] = [Node(num_blocks, 'avg')]

        # TODO(brendan): This is actually y^{(t)}. h^{(t)} is node N - 1 in
        # the graph, where N Is the number of nodes. I.e., h^{(t)} takes
        # only one other node as its input.
        # last h[t] node
        last_node = Node(num_blocks + 1, 'h[t]')
        dag[num_blocks] = [last_node]
        dags.append(dag)

    return dags
Exemple #30
0
    def train_controller(self):
        """Fixes the shared parameters and updates the controller parameters.

        The controller is updated with a score function gradient estimator
        (i.e., REINFORCE), with the reward being c/valid_ppl, where valid_ppl
        is computed on a minibatch of validation data.

        A moving average baseline is used.

        The controller is trained for 2000 steps per epoch (i.e.,
        first (Train Shared) phase -> second (Train Controller) phase).
        """
        model = self.controller
        model.train()
        # TODO(brendan): Why can't we call shared.eval() here? Leads to loss
        # being uniformly zero for the controller.
        # self.shared.eval()

        avg_reward_base = None
        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        hidden = self.shared.init_hidden(self.args.batch_size)
        total_loss = 0
        valid_idx = 0
        for step in range(self.args.controller_max_step):
            # sample models, need M=10?
            loss_avg = []
            for m in range(1):
                dags, log_probs, entropies = self.controller.sample(
                    with_details=True)

                # calculate reward
                np_entropies = entropies.data.cpu().numpy()
                # NOTE(brendan): No gradients should be backpropagated to the
                # shared model during controller training, obviously.
                with _get_no_grad_ctx_mgr():
                    rewards, hidden = self.get_reward(dags, np_entropies,
                                                      hidden, valid_idx)

                #hidden = hidden[-1].detach_() # should we reset immediately? like below
                hidden = self.shared.init_hidden(self.args.batch_size)
                # discount
                # if 1 > self.args.discount > 0:
                #     rewards = discount(rewards, self.args.discount)

                reward_history.extend(rewards)
                entropy_history.extend(np_entropies)

                # moving average baseline
                if baseline is None:
                    baseline = rewards
                else:
                    decay = self.args.ema_baseline_decay
                    baseline = decay * baseline + (1 - decay) * rewards

                adv = rewards - baseline
                adv_history.extend(adv)

                # policy loss
                loss = -log_probs * utils.get_variable(
                    adv, self.cuda, requires_grad=False)
                loss_avg.append(loss)
            # if self.args.entropy_mode == 'regularizer':
            #     loss -= self.args.entropy_coeff * entropies
            loss = torch.stack(loss_avg)
            loss = loss.sum()
            #loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            if self.args.controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              self.args.controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            if ((step % self.args.log_step) == 0) and (step > 0):
                self._summarize_controller_train(total_loss, adv_history,
                                                 entropy_history,
                                                 reward_history,
                                                 avg_reward_base, dags)

                reward_history, adv_history, entropy_history = [], [], []
                total_loss = 0

            self.controller_step += 1

            prev_valid_idx = valid_idx
            valid_idx = ((valid_idx + self.max_length) %
                         (self.valid_data.size(0) - 1))
            # NOTE(brendan): Whenever we wrap around to the beginning of the
            # validation data, we reset the hidden states.
            if prev_valid_idx > valid_idx:
                hidden = self.shared.init_hidden(self.args.batch_size)