def test_pick_batch_elems(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elems(x, [0]) self.assertTrue(np.allclose(y.npvalue(), self.pval[0])) z = dy.pick_batch_elems(x, [0, 1]) self.assertTrue(np.allclose(z.npvalue(), self.pval.T))
def calc_loss(self, policy_reward, only_final_reward=True): loss = losses.FactoredLossExpr() ## Calculate baseline pred_reward, baseline_loss = self.calc_baseline_loss(policy_reward, only_final_reward) if only_final_reward: rewards = [policy_reward - pw_i for pw_i in pred_reward] else: rewards = [pr_i - pw_i for pr_i, pw_i in zip(policy_reward, pred_reward)] loss.add_loss("rl_baseline", baseline_loss) ## Z-Normalization rewards = dy.concatenate(rewards, d=0) if self.z_normalization: rewards_value = rewards.value() rewards_mean = np.mean(rewards_value) rewards_std = np.std(rewards_value) + 1e-10 rewards = (rewards - rewards_mean) / rewards_std ## Calculate Confidence Penalty if self.confidence_penalty: cp_loss = self.confidence_penalty.calc_loss(self.policy_lls) loss.add_loss("rl_confpen", cp_loss) ## Calculate Reinforce Loss reinf_loss = [] # Loop through all action in one sequence for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)): # Main Reinforce calculation reward = dy.pick(rewards, i) ll = dy.pick_batch(policy, action) if self.valid_pos is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) reward = dy.pick_batch_elems(reward, self.valid_pos[i]) reinf_loss.append(dy.sum_batches(ll * reward)) loss.add_loss("rl_reinf", -self.weight * dy.esum(reinf_loss)) ## the composed losses return loss
def calc_baseline_loss(self, reward, only_final_reward): pred_rewards = [] cur_losses = [] for i, state in enumerate(self.states): pred_reward = self.baseline.transform(dy.nobackprop(state)) pred_rewards.append(dy.nobackprop(pred_reward)) seq_reward = reward if only_final_reward else reward[i] if self.valid_pos is not None: pred_reward = dy.pick_batch_elems(pred_reward, self.valid_pos[i]) act_reward = dy.pick_batch_elems(seq_reward, self.valid_pos[i]) else: act_reward = seq_reward cur_losses.append(dy.sum_batches(dy.squared_distance(pred_reward, dy.nobackprop(act_reward)))) return pred_rewards, dy.esum(cur_losses)
def output_and_loss(self, h_block, concat_t_block): concat_logit_block = self.output_affine(h_block, reconstruct_shape=False) bool_array = concat_t_block != 0 indexes = np.argwhere(bool_array).ravel() concat_logit_block = dy.pick_batch_elems(concat_logit_block, indexes) concat_t_block = concat_t_block[bool_array] loss = dy.pickneglogsoftmax_batch(concat_logit_block, concat_t_block) return loss
def split_batch(self, X, h): (n_rows, _), batch = X.dim() l = range(batch) steps = batch // h output = [] for i in range(0, batch, steps): indexes = l[i:i + steps] output.append(dy.pick_batch_elems(X, indexes)) return output
def calc_baseline_loss(self, rewards): avg_rewards = dy.average( rewards) # Taking average of the rewards accross multiple samples pred_rewards = [] loss = [] for i, state in enumerate(self.states): pred_reward = self.baseline(dy.nobackprop(state)) pred_rewards.append(dy.nobackprop(pred_reward)) if self.valid_pos is not None: pred_reward = dy.pick_batch_elems(pred_reward, self.valid_pos[i]) avg_reward = dy.pick_batch_elems(avg_rewards, self.valid_pos[i]) else: avg_reward = avg_rewards loss.append( dy.sum_batches(dy.squared_distance(pred_reward, avg_reward))) return pred_rewards, dy.esum(loss)
def calc_loss(self, policy): if self.weight < 1e-8: return None neg_entropy = [] for i, ll in enumerate(policy): if self.valid_pos is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) loss = dy.sum_batches(dy.sum_elems(dy.cmult(dy.exp(ll), ll))) neg_entropy.append(dy.sum_batches(loss)) return self.weight * dy.esum(neg_entropy)
def calc_loss(self, rewards): loss = FactoredLossExpr() ## Z-Normalization if self.z_normalization: reward_batches = dy.concatenate_to_batch(rewards) mean_batches = dy.mean_batches(reward_batches) std_batches = dy.std_batches(reward_batches) rewards = [ dy.cdiv(reward - mean_batches, std_batches) for reward in rewards ] ## Calculate baseline if self.baseline is not None: pred_reward, baseline_loss = self.calc_baseline_loss(rewards) loss.add_loss("rl_baseline", baseline_loss) ## Calculate Confidence Penalty if self.confidence_penalty: loss.add_loss("rl_confpen", self.confidence_penalty.calc_loss(self.policy_lls)) ## Calculate Reinforce Loss reinf_loss = [] # Loop through all action in one sequence for i, (policy, action_sample) in enumerate(zip(self.policy_lls, self.actions)): # Discount the reward if we use baseline if self.baseline is not None: rewards = [reward - pred_reward[i] for reward in rewards] # Main Reinforce calculation sample_loss = [] for action, reward in zip(action_sample, rewards): ll = dy.pick_batch(policy, action) if self.valid_pos is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) reward = dy.pick_batch_elems(reward, self.valid_pos[i]) sample_loss.append(dy.sum_batches(ll * reward)) # Take the average of the losses accross multiple samples reinf_loss.append(dy.esum(sample_loss) / len(sample_loss)) loss.add_loss("rl_reinf", self.weight * -dy.esum(reinf_loss)) ## the composed losses return loss
def calc_loss(self, policy_reward, results={}): """ Calc policy networks loss. """ assert len(policy_reward) == len(self.states), "There should be a reward for every action taken" batch_size = self.states[0].dim()[1] loss = {} # Calculate the baseline loss of the reinforce loss for each timestep: # b = W_b * s + b_b # R = r - b # Also calculate the baseline loss # b = r_p (predicted) # loss_b = squared_distance(r_p - r_r) rewards = [] baseline_loss = [] units = np.zeros(batch_size) for i, state in enumerate(self.states): r_p = self.baseline.transform(dy.nobackprop(state)) rewards.append(policy_reward[i] - r_p) if self.valid_pos[i] is not None: r_p = dy.pick_batch_elems(r_p, self.valid_pos[i]) r_r = dy.pick_batch_elems(policy_reward[i], self.valid_pos[i]) units[self.valid_pos[i]] += 1 else: r_r = policy_reward[i] units += 1 baseline_loss.append(dy.sum_batches(dy.squared_distance(r_p, r_r))) loss["rl_baseline"] = losses.LossExpr(dy.esum(baseline_loss), units) # Z Normalization # R = R - mean(R) / std(R) rewards = dy.concatenate(rewards, d=0) r_dim = rewards.dim() if self.z_normalization: rewards_shape = dy.reshape(rewards, (r_dim[0][0], r_dim[1])) rewards_mean = dy.mean_elems(rewards_shape) rewards_std = dy.std_elems(rewards_shape) + 1e-20 rewards = (rewards - rewards_mean.value()) / rewards_std.value() rewards = dy.nobackprop(rewards) # Calculate Confidence Penalty if self.confidence_penalty: loss["rl_confpen"] = self.confidence_penalty.calc_loss(self.policy_lls) # Calculate Reinforce Loss # L = - sum([R-b] * pi_ll) reinf_loss = [] units = np.zeros(batch_size) for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)): reward = dy.pick(rewards, i) ll = dy.pick_batch(policy, action) if self.valid_pos[i] is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) reward = dy.pick_batch_elems(reward, self.valid_pos[i]) units[self.valid_pos[i]] += 1 else: units += 1 reinf_loss.append(dy.sum_batches(dy.cmult(ll, reward))) loss["rl_reinf"] = losses.LossExpr(-dy.esum(reinf_loss), units) # Pack up + return return losses.FactoredLossExpr(loss)
import dynet as dy import numpy as np m = dy.Model() p = m.add_lookup_parameters((2, 3)) npp = np.asarray([[1, 2, 3], [4, 5, 6]], dtype=np.float32) p.init_from_array(npp) dy.renew_cg() x = dy.lookup_batch(p, [0, 1]) y = dy.pick_batch_elems(x, [0]) z = dy.pick_batch_elem(x, 1) yz = dy.pick_batch_elems(x, [0, 1]) w = dy.concat_to_batch([y, z]) print x.npvalue() print y.npvalue() print yz.npvalue() print w.npvalue() loss = dy.dot_product(y, z) loss.forward() loss.backward() print p.grad_as_array()
def cached_embedding_lookup(self, toks): chunks = map(tuple, zip(*toks)) cache_is = [self.cache_locs[chunk] for chunk in chunks] return dynet.pick_batch_elems(self.cached_embeddings, cache_is)
def run(self, words, tags, heads, rels, masks_w, masks_t, isTrain): if config.biaffine: mlp_dep_bias = dy.parameter(self.mlp_dep_bias) mlp_dep = dy.parameter(self.mlp_dep) mlp_head_bias = dy.parameter(self.mlp_head_bias) mlp_head = dy.parameter(self.mlp_head) W_arc = dy.parameter(self.W_arc) W_rel = dy.parameter(self.W_rel) #tokens in the sentence and root seq_len = len(words) + 1 punct_mask = np.array( [1 if rel != self._punct_id else 0 for rel in rels], dtype=np.uint32) preds_arc = [] preds_rel = [] loss_arc = 0 loss_rel = 0 num_cor_arc = 0 num_cor_rel = 0 if isTrain: # embs_w = [self.lp_w[w if w < self._vocab_size_w else 0] * mask_w for w, mask_w in zip(words, masks_w)] # embs_t = [self.lp_t[t if t < self._vocab_size_t else 0] * mask_t for t, mask_t in zip(tags, masks_t)] embs_w = [ self.lp_w[w] * mask_w for w, mask_w in zip(words, masks_w) ] embs_t = [ self.lp_t[t] * mask_t for t, mask_t in zip(tags, masks_t) ] embs_w = [self.emb_root[0] * masks_t[-1]] + embs_w embs_t = [self.emb_root[1] * masks_w[-1]] + embs_t else: # embs_w = [self.lp_w[w if w < self._vocab_size_w else 0] for w in words] # embs_t = [self.lp_t[t if t < self._vocab_size_t else 0] for t in tags] embs_w = [self.lp_w[w] for w in words] embs_t = [self.lp_t[t] for t in tags] embs_w = [self.emb_root[0]] + embs_w embs_t = [self.emb_root[1]] + embs_t lstm_ins = [ dy.concatenate([emb_w, emb_t]) for emb_w, emb_t in zip(embs_w, embs_t) ] # lstm_outs = dy.concatenate_cols([self.emb_root[0]] + utils.bilstm(self.l2r_lstm, self.r2l_lstm, lstm_ins, self._pdrop)) # lstm_outs = dy.concatenate_cols(utils.bilstm(self.LSTM_builders[0], self.LSTM_builders[1], lstm_ins, self._pdrop_lstm)) lstm_outs = dy.concatenate_cols( utils.biLSTM(self.LSTM_builders, lstm_ins, None, self._pdrop_lstm, self._pdrop_lstm)) # if isTrain: # lstm_outs = dy.dropout(lstm_outs, self._pdrop) if config.biaffine: embs_dep, embs_head = \ utils.leaky_relu(dy.affine_transform([mlp_dep_bias, mlp_dep, lstm_outs])), \ utils.leaky_relu(dy.affine_transform([mlp_head_bias, mlp_head, lstm_outs])) if isTrain: embs_dep, embs_head = dy.dropout(embs_dep, self._pdrop_mlp), dy.dropout( embs_head, self._pdrop_mlp) dep_arc, dep_rel = embs_dep[:self._arc_dim], embs_dep[self. _arc_dim:] head_arc, head_rel = embs_head[:self. _arc_dim], embs_head[self._arc_dim:] logits_arc = utils.bilinear(dep_arc, W_arc, head_arc, self._arc_dim, seq_len, config.batch_size, 1, self.biaffine_bias_x_arc, self.biaffine_bias_y_arc) else: mlp = dy.parameter(self.mlp) mlp_bias = dy.parameter(self.mlp_bias) embs = \ utils.leaky_relu(dy.affine_transform([mlp_bias, mlp, lstm_outs])) if isTrain: embs = dy.dropout(embs, self._pdrop_mlp) embs_arc, embs_rel = embs[:self._arc_dim * 2], embs[self._arc_dim * 2:] W_r_arc = dy.parameter(self.V_r_arc) W_i_arc = dy.parameter(self.V_i_arc) bias_arc = dy.parameter(self.bias_arc) logits_arc = utils.biED(embs_arc, W_r_arc, W_i_arc, embs_arc, seq_len, 1, bias=bias_arc) # flat_logits_arc = dy.reshape(logits_arc[:][1:], (seq_len,), seq_len - 1) flat_logits_arc = dy.reshape(logits_arc, (seq_len, ), seq_len) # flat_logits_arc = dy.pick_batch_elems(flat_logits_arc, [e for e in range(1, seq_len)]) flat_logits_arc = dy.pick_batch_elems( flat_logits_arc, np.arange(1, seq_len, dtype='int32')) loss_arc = dy.pickneglogsoftmax_batch(flat_logits_arc, heads) if not isTrain: # msk = [1] * seq_len msk = np.ones((seq_len), dtype='int32') arc_probs = dy.softmax(logits_arc).npvalue() arc_probs = np.transpose(arc_probs) preds_arc = utils.arc_argmax(arc_probs, seq_len, msk, ensure_tree=True) # preds_arc = logits_arc.npvalue().argmax(0) cor_arcs = np.multiply(np.equal(preds_arc[1:], heads), punct_mask) num_cor_arc = np.sum(cor_arcs) if not config.las: return loss_arc, num_cor_arc, num_cor_rel if config.biaffine: logits_rel = utils.bilinear(dep_rel, W_rel, head_rel, self._rel_dim, seq_len, 1, self._vocab_size_r, self.biaffine_bias_x_rel, self.biaffine_bias_y_rel) else: V_r_rel = dy.parameter(self.V_r_rel) V_i_rel = dy.parameter(self.V_i_rel) bias_rel = dy.parameter(self.bias_rel) logits_rel = utils.biED(embs_rel, V_r_rel, V_i_rel, embs_rel, seq_len, self._vocab_size_r, bias=bias_rel) # flat_logits_rel = dy.reshape(logits_rel[:][1:], (seq_len, self._vocab_size_r), seq_len - 1) flat_logits_rel = dy.reshape(logits_rel, (seq_len, self._vocab_size_r), seq_len) # flat_logits_rel = dy.pick_batch_elems(flat_logits_rel, [e for e in range(1, seq_len)]) flat_logits_rel = dy.pick_batch_elems( flat_logits_rel, np.arange(1, seq_len, dtype='int32')) partial_rel_logits = dy.pick_batch(flat_logits_rel, heads if isTrain else preds_arc[1:]) if isTrain: loss_rel = dy.sum_batches( dy.pickneglogsoftmax_batch(partial_rel_logits, rels)) else: preds_rel = partial_rel_logits.npvalue().argmax(0) num_cor_rel = np.sum( np.multiply(np.equal(preds_rel, rels), cor_arcs)) return loss_arc + loss_rel, num_cor_arc, num_cor_rel