def evaluate(self, source, dag, name, batch_size=1, max_num=None): """Evaluate on the validation set. """ self.shared.eval() self.controller.eval() if self.image_dataset: data = source else: data = source[:max_num * self.max_length] total_loss = 0 hidden = self.shared.init_training(batch_size) pbar = range(0, self.valid_data_size - 1, self.max_length) for count, idx in enumerate(pbar): inputs, targets = self.get_batch(data, idx, volatile=True) output, hidden, _ = self.shared(inputs, dag, hidden=hidden, is_train=False) output_flat = output.view(-1, self.dataset.num_classes) total_loss += len(inputs) * self.ce(output_flat, targets).data hidden = utils.detach(hidden) ppl = math.exp( utils.to_item(total_loss) / (count + 1) / self.max_length) val_loss = utils.to_item(total_loss) / len(data) ppl = math.exp(val_loss) self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch) self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch) logger.info(f'eval | loss: {val_loss:8.2f} | ppl: {ppl:8.2f}')
def evaluate(self, source, dag, name, batch_size=1, max_num=None): """Evaluate on the validation set. NOTE(brendan): We should not be using the test set to develop the algorithm (basic machine learning good practices). """ self.shared.eval() self.controller.eval() data = source[:max_num * self.max_length] total_loss = 0 hidden = self.shared.init_hidden(batch_size) pbar = range(0, data.size(0) - 1, self.max_length) for count, idx in enumerate(pbar): inputs, targets = self.get_batch(data, idx, volatile=True) output, hidden = self.shared(inputs, dag, prev_s=hidden, is_training=False) output_flat = output.view(-1, self.dataset.num_tokens) total_loss += len(inputs) * self.ce(output_flat, targets).data hidden = hidden.detach_() ppl = math.exp( utils.to_item(total_loss) / (count + 1) / self.max_length) val_loss = utils.to_item(total_loss) / len(data) ppl = math.exp(val_loss) self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch) self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch) logger.info(f'eval | loss: {val_loss:8.2f} | ppl: {ppl:8.2f}')
def get_reward(self, dags, entropies, hidden=None, valid_idx=None): if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() if valid_idx: valid_idx = 0 self.reset_dataloader_by_name('valid') inputs, targets = self.get_batch(self.valid_data, valid_idx, self.max_length, volatile=True) _, valid_loss, _, extra_out = self.get_loss(inputs, targets, dags) valid_loss = utils.to_item(valid_loss.data) valid_acc = utils.to_item(extra_out['acc']) if self.args.ppl_square: R = self.args.reward_c * valid_acc**2 else: R = self.args.reward_c * valid_acc if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unkown entropy mode: {self.args.entropy_mode}') return rewards, None
def _summarize_shared_train(self, total_loss, raw_total_loss): """Logs a set of training steps.""" cur_loss = utils.to_item(total_loss) / self.args.log_step_shared # NOTE(brendan): The raw loss, without adding in the activation # regularization terms, should be used to compute ppl. cur_raw_loss = utils.to_item( raw_total_loss) / self.args.log_step_shared logger.info(f'| epoch {self.epoch:3d} ' f'| lr {self.args.shared_lr:.2f} ' f'| raw loss {cur_raw_loss:.2f} ' f'| loss {cur_loss:.2f} ')
def _summarize_shared_train(self, total_loss, raw_total_loss, acc=0): """Logs a set of training steps.""" cur_loss = utils.to_item(total_loss) / self.args.log_step cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step logger.info(f'| epoch {self.epoch:3d} ' f'| lr {self.shared_lr:4.2f} ' f'| raw loss {cur_raw_loss:.2f} ' f'| loss {cur_loss:.2f} ' f'| acc {acc:8.2f}') # Tensorboard if self.tb is not None: self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step) self.tb.scalar_summary('shared/accuracy', acc, self.shared_step)
def _summarize_shared_train(self, total_loss, raw_total_loss): """Logs a set of training steps.""" cur_loss = utils.to_item(total_loss) / self.args.log_step # NOTE(brendan): The raw loss, without adding in the activation # regularization terms, should be used to compute ppl. cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step ppl = math.exp(cur_raw_loss) logger.info( '| epoch {0:3d} | lr {1:4.2f} | raw loss {2:.2f} | loss {3:.2f} | ppl {4:8.2f}' .format(self.epoch, self.shared_lr, cur_raw_loss, cur_loss, ppl)) # Tensorboard if self.tb is not None: self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step) self.tb.scalar_summary('shared/perplexity', ppl, self.shared_step)
def get_reward(self, dag, entropies, hidden, valid_idx=0): """Computes the perplexity of a single sampled model on a minibatch of validation data. 计算模型的PPL:每个词的条件预测概率(即已知前n个词预测第n+1个词的概率)的累积的倒数开N(全体词的数量)次方 """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() inputs, targets = self.get_batch(self.valid_data, valid_idx, self.max_length, volatile=True) valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag) #RNN.forward valid_loss = utils.to_item(valid_loss.data) valid_ppl = math.exp(valid_loss) #计算PPL # TODO: we don't know reward_c if self.args.ppl_square: #default:false # TODO: but we do know reward_c=80 in the previous paper R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl #这个值的作用在NAS(Zoph and Le, 2017) page 8 states that c is a constant if self.args.entropy_mode == 'reward': #entroy_mode:default:reward rewards = R + self.args.entropy_coeff * entropies # entropy_coeff:default=1e-4 elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError('Unkown entropy mode: {0}'.format( self.args.entropy_mode)) return rewards, hidden
def evaluate(self, dag, batch_size=1): """Evaluate on the validation set. NOTE(brendan): We should not be using the test set to develop the algorithm (basic machine learning good practices). """ self.shared.eval() self.controller.eval() #val_loss = 0 dice_score = 0 valid_dataloader = brats_dataloader(self.val, self.args.batch_size, None, 1, infinite=False, return_incomplete=True) for batch in valid_dataloader: inputs = torch.from_numpy(batch['data']).cuda() targets = torch.from_numpy(batch['seg'].astype(int)).cuda() targets = get_multi_class_labels(targets, n_labels=self.args.n_classes) dice_score += utils.to_item(self.get_score(inputs, targets, dag)) #val_loss =val_loss/len(valid_dataloader) dice_score = dice_score / len(valid_dataloader) """ self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch) self.tb.scalar_summary(f'eval/{name}_dice_score', dice_score, self.epoch) """ self.logger.info(f'eval | dice_score: {dice_score:8.2f}') return dice_score
def get_reward(self, dag, entropies, data_iter): """Computes the perplexity of a single sampled model on a minibatch of validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() try: inputs, targets = data_iter.next() except StopIteration: data_iter = iter(self.valid_data) inputs, targets = data_iter.next() #TODO 怎么做volidate valid_loss = self.get_loss(inputs, targets, dag) # convert valid_loss to numpy ndarray valid_loss = utils.to_item(valid_loss.data) valid_ppl = math.exp(valid_loss) # TODO we don't knoe reward_c if self.args.ppl_square: #TODO: but we do know reward_c =80 in the previous paper need to read previous paper R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unknown entropy mode: {self.args.entropy_mode}') return rewards
def evaluate(self, source, dag, name, batch_size=1, max_num=None): """Evaluate on the entire validation set. Reset the data_generator every time. """ self.shared.eval() self.controller.eval() self.reset_dataloader_by_name('eval') data = self.eval_data total_loss = 0 acc = 0 for count, (inputs, targets) in enumerate(data): inputs = inputs.to(self.device) targets = targets.to(self.device) output, _ = self.shared(inputs, dag, is_train=False) output_flat = output.view(-1, self.dataset.num_classes) total_loss += len(inputs) * self.ce(output_flat, targets).data acc += self.top_k_acc(output_flat, targets) val_loss = utils.to_item(total_loss) / len(data) acc /= count self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch) self.tb.scalar_summary(f'eval/{name}_top_1_acc', acc, self.epoch) logger.info(f'eval | loss: {val_loss:8.2f} | top_1_acc: {acc:8.2f}')
def _construct_dags(prev_nodes, activations, func_names, num_blocks): """Constructs a set of DAGs based on the actions, i.e., previous nodes and activation functions, sampled from the controller/policy pi. Args: prev_nodes: Previous node actions from the policy. activations: Activations sampled from the policy. func_names: Mapping from activation function names to functions. num_blocks: Number of blocks in the target RNN cell. Returns: A list of DAGs defined by the inputs. RNN cell DAGs are represented in the following way: 1. Each element (node) in a DAG is a list of `Node`s. 2. The `Node`s in the list dag[i] correspond to the subsequent nodes that take the output from node i as their own input. 3. dag[-1] is the node that takes input from x^{(t)} and h^{(t - 1)}. dag[-1] always feeds dag[0]. dag[-1] acts as if `w_xc`, `w_hc`, `w_xh` and `w_hh` are its weights. 4. dag[N - 1] is the node that produces the hidden state passed to the next timestep. dag[N - 1] is also always a leaf node, and therefore is always averaged with the other leaf nodes and fed to the output decoder. """ dags = [] for nodes, func_ids in zip(prev_nodes, activations): dag = collections.defaultdict(list) # add first node logger.info( f'[Debug] The size of func_names: {len(func_names)}, i:{func_ids[0]}' ) dag[-1] = [Node(0, func_names[func_ids[0]])] dag[-2] = [Node(0, func_names[func_ids[0]])] # add following nodes for jdx, (idx, func_id) in enumerate(zip(nodes, func_ids[1:])): dag[utils.to_item(idx)].append(Node(jdx + 1, func_names[func_id])) leaf_nodes = set(range(num_blocks)) - dag.keys() # merge with avg for idx in leaf_nodes: dag[idx] = [Node(num_blocks, 'avg')] # TODO(brendan): This is actually y^{(t)}. h^{(t)} is node N - 1 in # the graph, where N Is the number of nodes. I.e., h^{(t)} takes # only one other node as its input. # last h[t] node last_node = Node(num_blocks + 1, 'h[t]') dag[num_blocks] = [last_node] dags.append(dag) return dags
def get_reward(self, dag, entropies, hidden, valid_idx=0): """Computes the perplexity of a single sampled model on a minibatch of validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() inputs, targets = self.get_batch(self.valid_data, valid_idx, self.max_length, volatile=True) valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag) valid_loss = utils.to_item(valid_loss.data) valid_ppl = math.exp(valid_loss) # TODO: we don't know reward_c if self.args.ppl_square: # TODO: but we do know reward_c=80 in the previous paper R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unkown entropy mode: {self.args.entropy_mode}') return rewards, hidden
def evaluate(self, source, dag, max_num=None): self.shared.eval() self.controller.eval() if max_num == None: max_num = source.size(0) else: max_num *= self.max_length data = source[:max_num] total_loss = 0 hidden = None pbar = range(0, data.size(0) - 1, self.max_length) for count, idx in enumerate(pbar): inputs, targets = self.get_batch(data, idx) output, hidden, _ = self.shared(inputs, dag, hidden=hidden, is_train=False) output_flat = output.view(-1, self.dataset.num_tokens) total_loss += len(inputs) * self.ce(output_flat, targets).data val_loss = utils.to_item(total_loss) / len(data) ppl = math.exp(val_loss) return ppl
def get_reward(self, dag, entropies): """Computes the perplexity of a single sampled model on a minibatch of validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() # Declare validation data here inputs = self.valid_queue[0] target = self.valid_queue[1] valid_ppl = self.get_loss(inputs, target, dag, mode='Valid') valid_ppl = utils.to_item(valid_ppl.data) # TODO: we don't know reward_c if self.args.ppl_square: # TODO: but we do know reward_c=80 in the previous paper R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unkown entropy mode: {self.args.entropy_mode}') return rewards
def _summarize_shared_train(self, total_loss, raw_total_loss): """Logs a set of training steps.""" cur_loss = utils.to_item(total_loss) / self.args.log_step # NOTE(brendan): The raw loss, without adding in the activation # regularization terms, should be used to compute ppl. cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step ppl = math.exp(cur_raw_loss) print(f'| epoch {self.epoch:3d} ' f'| lr {self.shared_lr:4.2f} ' f'| raw loss {cur_raw_loss:.2f} ' f'| loss {cur_loss:.2f} ' f'| ppl {ppl:8.2f}') # Tensorboard if self.tb is not None: self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step) self.tb.scalar_summary('shared/perplexity', ppl, self.shared_step)
def _construct_micro_cnn_dags(prev_nodes, activations, func_names, num_blocks): """Constructs a set of DAGs based on the actions, i.e., previous nodes and activation functions, sampled from the controller/policy pi. This will be tailored for CNN only. Not the afore-mentioned RNN. Args: prev_nodes: Previous node actions from the policy. activations: Activations sampled from the policy. func_names: [normal_func_names, reduce_func_names] num_blocks: Number of blocks in the target RNN cell. Returns: A list of DAGs defined by the inputs. CNN cell DAGs are represented in the following way: 1. entire DAG is represent as a simple list, of element 2 [ Normal-Cell, Reduction-Cell ] 2. each element is another list, containing such information [ (node_id1, node_id2, ops), ] * num_blocks represents node1 -- ops --> node 2 3. node 0, represents the h(t-1), i.e. previous layer input node 1, represents the h(t), i.e. current input so, the actually index for current block starts from2 """ dags = [] for nodes, func_ids in zip(prev_nodes, activations): dag = [] # compute the first node # dag.append(MicroNode(0, 2, func_names[func_ids[0]])) # dag.append(MicroNode(1, 2, func_names[func_ids[0]])) leaf_nodes = set(range(2, num_blocks + 2)) # add following nodes for curr_idx, (prev_idx, func_id) in enumerate(zip(nodes, func_ids)): layer_id = curr_idx // 2 + 2 _prev_idx = utils.to_item(prev_idx) if _prev_idx == layer_id: continue assert _prev_idx < layer_id, "Crutial logical error" dag.append( MicroNode(_prev_idx, layer_id, func_names[func_id])) leaf_nodes -= set([_prev_idx]) # add leaf node connection with concat # for idx in leaf_nodes: # dag.append(MicroNode(idx, num_blocks, 'concat')) dag.sort() dags.append(dag) return dags
def _summarize_shared_train(self, total_loss, raw_total_loss): """Logs a set of training steps.""" cur_loss = utils.to_item(total_loss) / self.args.log_step cur_raw_loss = utils.to_item(raw_total_loss) / self.args.log_step try: ppl = math.exp(cur_raw_loss) except RuntimeError as e: print(f"Got error {e}") logger.info(f'| epoch {self.epoch:3d} ' f'| lr {self.shared_lr:4.2f} ' f'| raw loss {cur_raw_loss:.2f} ' f'| loss {cur_loss:.2f} ' f'| ppl {ppl:8.2f}') # Tensorboard if self.tb is not None: self.tb.scalar_summary('shared/loss', cur_loss, self.shared_step) self.tb.scalar_summary('shared/perplexity', ppl, self.shared_step)
def evaluate(self, test_iter, dag, name, batch_size=1, max_num=None): """Evaluate on the validation set. (lianqing)what is the data of source ? NOTE: use validation to check reward but test set is the same as valid set """ self.shared.eval() self.controller.eval() acc = AverageMeter() # data = source[:max_num*self.max_length] total_loss = 0 # pbar = range(0, data.size(0) - 1, self.max_length) count = 0 while True: try: count += 1 inputs, targets = next(test_iter) except StopIteration: print("========> finish evaluate on one epoch<======") break test_iter = iter(self.test_data) inputs, targets = next(test_iter) # inputs = Variable(inputs) #check if is train the controller will have what difference inputs = Variable(inputs.cuda()) targets = Variable(targets.cuda()) # inputs = inputs.cuda() #targets = targets.cuda() output = self.shared(inputs, dag, is_train=False) # check is self.loss wil work ?: total_loss += len(inputs) * self.ce(output, targets).data ppl = math.exp(utils.to_item(total_loss) / (count + 1)) acc.update(utils.get_accuracy(targets, output)) val_loss = utils.to_item(total_loss) / count ppl = math.exp(val_loss) #TODO it's fix for rnn need to fix for cnn #self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch) #self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch) print( f'eval | loss: {val_loss:8.2f} | ppl: {ppl:8.2f} | accuracy: {acc.avg:8.2f}' )
def _evaluate_valid(dag): hidden_eval = self.shared.init_training(self.args.batch_size) inputs_eval, targets_eval = self.get_batch(self.valid_data, 0, self.max_length, volatile=True) _, valid_loss_eval, _, _, _ = self.get_loss(inputs_eval, targets_eval, dag, hidden=hidden_eval) valid_loss_eval = utils.to_item(valid_loss_eval.data) valid_ppl_eval = math.exp(valid_loss_eval)
def _clip_gradient(abs_max_grad, abs_max_hidden_norm): h1tohT = extra_out['hiddens'] new_abs_max_hidden_norm = utils.to_item( h1tohT.norm(dim=-1).data.max()) if new_abs_max_hidden_norm > abs_max_hidden_norm: abs_max_hidden_norm = new_abs_max_hidden_norm logger.info(f'max hidden {abs_max_hidden_norm}') abs_max_grad = _check_abs_max_grad(abs_max_grad, model) torch.nn.utils.clip_grad_norm(model.parameters(), self.args.shared_grad_clip) return abs_max_grad, abs_max_hidden_norm
def _evaluate_valid(dag): self.valid_data = iter(self._valid_data) inputs_eval, targets_eval = self.get_batch(self.valid_data, 0, self.max_length, volatile=True) _, valid_loss_eval, _, extra_out = self.get_loss( inputs_eval, targets_eval, dag) valid_loss_eval = utils.to_item(valid_loss_eval.data) valid_acc_eval = extra_out['acc'] return valid_loss_eval, valid_acc_eval
def _check_max_hidden(abs_max_hidden_norm, h1tohT): """Debugging function that checks if `h1tohT` contains a new largest hidden state (as measured by L2 norm), and returns the (potentially updated) largest hidden state L2 norm. """ new_abs_max_hidden_norm = utils.to_item( h1tohT.norm(p=2, dim=-1).data.max()) if new_abs_max_hidden_norm > abs_max_hidden_norm: abs_max_hidden_norm = new_abs_max_hidden_norm logger.info(f'max hidden {abs_max_hidden_norm}') return abs_max_hidden_norm
def evaluate(self, source, dag, name, batch_size=1, max_num=None): """Evaluate dag (child model) on the validation set. PT: only if validation set data is passed-in in source (compare to eval_once in the Tensorflow implementation ) NOTE(brendan): We should not be using the test set to develop the algorithm (basic machine learning good practices). """ self.shared.eval() self.controller.eval() data = source[:max_num * self.max_length] total_loss = 0 hidden = self.shared.init_hidden(batch_size) pbar = range(0, data.size(0) - 1, self.max_length) for count, idx in enumerate(pbar): inputs, targets = self.get_batch(data, idx, volatile=True) output, hidden, _ = self.shared(inputs, dag, hidden=hidden, is_train=False) output_flat = output.view(-1, self.dataset.num_tokens) total_loss += len(inputs) * self.ce(output_flat, targets).data hidden.detach_() #PT: Nothing seems to be done with this ppl (?) ppl = math.exp( utils.to_item(total_loss) / (count + 1) / self.max_length) val_loss = utils.to_item(total_loss) / len(data) ppl = math.exp(val_loss) self.tb.scalar_summary(f'eval/{name}_loss', val_loss, self.epoch) self.tb.scalar_summary(f'eval/{name}_ppl', ppl, self.epoch) logger.info(f'eval {name} | loss: {val_loss:8.2f} | ppl: {ppl:8.2f}') return val_loss, ppl
def evaluate(self, dag): """Evaluate on the validation set. NOTE(brendan): We should not be using the test set to develop the algorithm (basic machine learning good practices). """ with torch.no_grad(): total_loss = 0 inputs = self.test_queue[0] targets = self.test_queue[1] output = self.shared(inputs, dag[0]) total_loss = self.criterion_controller(output, targets).data test_mae = utils.to_item(total_loss) logger.info(f'dag = {dag}') logger.info(f'eval | test mae: {test_mae:8.2f}')
def get_reward(self, dag, entropies, hidden, valid_idx=0): """Computes the perplexity of a single sampled model on a minibatch of validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() if valid_idx: valid_idx = 0 #self.valid_data.size()=[1152,64] #self.max_length=35 ("gradient w is computed using back-propagation through # time truncted to 35 time steps" - Section 2.2 2nd paragraph) # inputs.size() = [35,64] # targets.size() = 2240 = 35*64 inputs, targets = self.get_batch(self.valid_data, valid_idx, self.max_length, volatile=True) valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag) #hidden.size() = [64,1000] -> 64 is minibatch size, 1000?? # valid_loss = utils.to_item(valid_loss.data) #torch.onnx.export(self.shared, inputs, "dag.onnx") #perplexity valid_ppl = math.exp(valid_loss) # TODO: we don't know reward_c if self.args.ppl_square: # TODO: but we do know reward_c=80 in the previous paper R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl #entropies - python array with 23 values if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unkown entropy mode: {self.args.entropy_mode}') return rewards, hidden, valid_ppl
def get_reward(self, dags, entropies, inputs, targets): """Computes the dicescore of a single sampled model on a minibatch of validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() score = self.get_score(inputs, targets, dags) #score=1-self.get_loss(inputs,targets,dags) print(score.item()) R = utils.to_item(score.data) if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies.mean() elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unkown entropy mode: {self.args.entropy_mode}') return rewards
def get_reward(self, dags, entropies, hidden, valid_idx=None): """ Computes the reward of a single sampled model or multiple on a minibatch of validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() if valid_idx is None: valid_idx = 0 inputs, targets = self.get_batch(self.valid_data, valid_idx, self.max_length, volatile=True) _, valid_loss, _, hidden, _ = self.get_loss(inputs, targets, dags, hidden=hidden) valid_loss = utils.to_item(valid_loss.data) valid_ppl = math.exp(valid_loss) if self.args.ppl_square: R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplementedError( f'Unkown entropy mode: {self.args.entropy_mode}') return rewards, hidden
def _construct_dags(prev_nodes, activations, func_names, num_blocks): dags = [] for nodes, func_ids in zip(prev_nodes, activations): dag = collections.defaultdict(list) # add first node dag[-1] = [Node(0, func_names[func_ids[0]])] dag[-2] = [Node(0, func_names[func_ids[0]])] # add following nodes for jdx, (idx, func_id) in enumerate(zip(nodes, func_ids[1:])): dag[utils.to_item(idx)].append(Node(jdx + 1, func_names[func_id])) leaf_nodes = set(range(num_blocks)) - dag.keys() # merge with avg for idx in leaf_nodes: dag[idx] = [Node(num_blocks, 'avg')] last_node = Node(num_blocks + 1, 'h[t]') dag[num_blocks] = [last_node] dags.append(dag) return dags
def _construct_dags(prev_nodes, activations, func_names, num_blocks): """Constructs a set of DAGs based on the actions, i.e., previous nodes and activation functions, sampled from the controller/policy pi. Args: prev_nodes: Previous node actions from the policy. activations: Activations sampled from the policy. func_names: Mapping from activation function names to functions. num_blocks: Number of blocks in the target RNN cell. Returns: A list of DAGs defined by the inputs. RNN cell DAGs are represented in the following way: 1. Each element (node) in a DAG is a list of `Node`s. 2. The `Node`s in the list dag[i] correspond to the subsequent nodes that take the output from node i as their own input. 3. dag[-1] is the node that takes input from x^{(t)} and h^{(t - 1)}. dag[-1] always feeds dag[0]. dag[-1] acts as if `w_xc`, `w_hc`, `w_xh` and `w_hh` are its weights. 4. dag[N - 1] is the node that produces the hidden state passed to the next timestep. dag[N - 1] is also always a leaf node, and therefore is always averaged with the other leaf nodes and fed to the output decoder. """ dags = [] for nodes, func_ids in zip(prev_nodes, activations): #nodes[1,11] func_ids[1,12] #这里由于prev_nodes是[[1,11]],activations是[[1,12]],zip之后是[[1,11],[1,12]],实际上这个循环也就运行一次, dag = collections.defaultdict(list) # add first node dag[-1] = [Node(0, func_names[func_ids[0]])] dag[-2] = [Node(0, func_names[func_ids[0]])] # add following nodes Node:utils.py->Node = collections.namedtuple('Node', ['id', 'name']) #dag里面存的就是一个图的所有信息,激活函数,前一个结点 for jdx, (idx, func_id) in enumerate(zip(nodes, func_ids[1:])): dag[utils.to_item(idx)].append(Node( jdx + 1, func_names[func_id])) #utils.to_item()返回tensor对应的数值 #叶子结点:后面没有其他结点的结点,是一个set,保证索引不重复 leaf_nodes = set(range(num_blocks)) - dag.keys() # merge with avg,将所有叶子结点归并都一个avg结点中 for idx in leaf_nodes: dag[idx] = [Node(num_blocks, 'avg')] # TODO(brendan): This is actually y^{(t)}. h^{(t)} is node N - 1 in # the graph, where N Is the number of nodes. I.e., h^{(t)} takes # only one other node as its input. # last h[t] node last_node = Node(num_blocks + 1, 'h[t]') dag[num_blocks] = [last_node] dags.append(dag) return dags
def train_controller(self): """Fixes the shared parameters and updates the controller parameters. The controller is updated with a score function gradient estimator (i.e., REINFORCE), with the reward being c/valid_ppl, where valid_ppl is computed on a minibatch of validation data. A moving average baseline is used. The controller is trained for 2000 steps per epoch (i.e., first (Train Shared) phase -> second (Train Controller) phase). """ model = self.controller model.train() # TODO(brendan): Why can't we call shared.eval() here? Leads to loss # being uniformly zero for the controller. # self.shared.eval() avg_reward_base = None baseline = None adv_history = [] entropy_history = [] reward_history = [] hidden = self.shared.init_hidden(self.args.batch_size) total_loss = 0 valid_idx = 0 for step in range(self.args.controller_max_step): # sample models, need M=10? loss_avg = [] for m in range(1): dags, log_probs, entropies = self.controller.sample( with_details=True) # calculate reward np_entropies = entropies.data.cpu().numpy() # NOTE(brendan): No gradients should be backpropagated to the # shared model during controller training, obviously. with _get_no_grad_ctx_mgr(): rewards, hidden = self.get_reward(dags, np_entropies, hidden, valid_idx) #hidden = hidden[-1].detach_() # should we reset immediately? like below hidden = self.shared.init_hidden(self.args.batch_size) # discount # if 1 > self.args.discount > 0: # rewards = discount(rewards, self.args.discount) reward_history.extend(rewards) entropy_history.extend(np_entropies) # moving average baseline if baseline is None: baseline = rewards else: decay = self.args.ema_baseline_decay baseline = decay * baseline + (1 - decay) * rewards adv = rewards - baseline adv_history.extend(adv) # policy loss loss = -log_probs * utils.get_variable( adv, self.cuda, requires_grad=False) loss_avg.append(loss) # if self.args.entropy_mode == 'regularizer': # loss -= self.args.entropy_coeff * entropies loss = torch.stack(loss_avg) loss = loss.sum() #loss = loss.sum() # or loss.mean() # update self.controller_optim.zero_grad() loss.backward() if self.args.controller_grad_clip > 0: torch.nn.utils.clip_grad_norm(model.parameters(), self.args.controller_grad_clip) self.controller_optim.step() total_loss += utils.to_item(loss.data) if ((step % self.args.log_step) == 0) and (step > 0): self._summarize_controller_train(total_loss, adv_history, entropy_history, reward_history, avg_reward_base, dags) reward_history, adv_history, entropy_history = [], [], [] total_loss = 0 self.controller_step += 1 prev_valid_idx = valid_idx valid_idx = ((valid_idx + self.max_length) % (self.valid_data.size(0) - 1)) # NOTE(brendan): Whenever we wrap around to the beginning of the # validation data, we reset the hidden states. if prev_valid_idx > valid_idx: hidden = self.shared.init_hidden(self.args.batch_size)