def on_calc_additional_loss(self, translator_loss): if not self.learn_segmentation or self.segment_decisions is None: return None reward = -translator_loss["mle"] if not self.log_reward: reward = dy.exp(reward) reward = dy.nobackprop(reward) # Make sure that reward is not scalar, but rather based on the each batch item assert reward.dim()[1] == len(self.src_sent) # Mask enc_mask = self.enc_mask.get_active_one_mask().transpose() if self.enc_mask is not None else None # Compose the lose ret = LossBuilder() ## Length prior alpha = self.length_prior_alpha.value() if self.length_prior_alpha is not None else 0 if alpha > 0: reward += self.segment_length_prior * alpha # reward z-score normalization if self.z_normalization: reward = dy.cdiv(reward-dy.mean_batches(reward), dy.std_batches(reward) + EPS) ## Baseline Loss if self.use_baseline: baseline_loss = [] for i, baseline in enumerate(self.bs): loss = dy.squared_distance(reward, baseline) if enc_mask is not None: loss = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), loss) baseline_loss.append(loss) ret.add_loss("Baseline", dy.esum(baseline_loss)) if self.print_sample: print(dy.exp(self.segment_logsoftmaxes[i]).npvalue().transpose()[0]) ## Reinforce Loss lmbd = self.lmbd.value() if lmbd > 0.0: reinforce_loss = [] # Calculating the loss of the baseline and reinforce for i in range(len(self.segment_decisions)): ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i]) if self.use_baseline: r_i = reward - dy.nobackprop(self.bs[i]) else: r_i = reward if enc_mask is not None: ll = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), ll) reinforce_loss.append(r_i * -ll) loss = dy.esum(reinforce_loss) * lmbd ret.add_loss("Reinforce", loss) if self.confidence_penalty: ls_loss = self.confidence_penalty(self.segment_logsoftmaxes, enc_mask) ret.add_loss("Confidence Penalty", ls_loss) # Total Loss return ret
def reparameterize(mu, logvar): # Get z by reparameterization. d = mu.dim()[0][0] eps = dy.random_normal(d) std = dy.exp(logvar * 0.5) return mu + dy.cmult(std, eps)
def reparameterize(self, mu, logvar): if self.training: std = dy.exp(logvar * 0.5) eps = dy.random_normal(dim=std.dim()[0], mean=0.0, stddev=1.0) return dy.cmult(eps, std) + mu else: return mu
def log_sum_exp(scores, n_tags): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dy.pick(scores, argmax_score) max_score_expr_broadcast = dy.concatenate([max_score_expr] * n_tags) return max_score_expr + dy.log( dy.sum_cols(dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
def test_item(model, sentence): seq = [ model.wlookup[int(model.w2i.get(entry, 0))] for entry in sentence.preprocessed_sentence ] if len(seq) > 0: encoded_sequence = encode_sequence(model, seq, model.sentence_rnn) global_max = max_pooling(encoded_sequence) global_min = average_pooling(encoded_sequence) if len(encoded_sequence) > 0: att_mlp_outputs = [] for e in encoded_sequence: mlp_out = (model.attention_w * e) + model.attention_b att_mlp_outputs.append(mlp_out) lst = [] for o in att_mlp_outputs: lst.append(dy.exp(dy.sum_elems(dy.cmult(o, model.att_context)))) sum_all = dy.esum(lst) probs = [dy.cdiv(e, sum_all) for e in lst] att_context = dy.esum( [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)]) context = dy.concatenate([att_context, global_max, global_min]) y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b) sentence.prediction_result = y_pred.scalar_value() dy.renew_cg() return sentence.prediction_result return 0
def softmax(x): """ Compute the softmax function in tensorflow. You might find the tensorflow functions tf.exp, tf.reduce_max, tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may not need to use all of these functions). Recall also that many common tensorflow operations are sugared (e.g. x * y does a tensor multiplication if x and y are both tensors). Make sure to implement the numerical stability fixes as in the previous homework! Args: x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are represented by row-vectors. (For simplicity, no need to handle 1-d input as in the previous homework) Returns: out: tf.Tensor with shape (n_sample, n_features). You need to construct this tensor in this problem. """ ### YOUR CODE HERE x_max = dy.max_dim(x, 1) x_sub = dy.colwise_add(x, -x_max) x_exp = dy.exp(x_sub) sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp)) out = dy.cdiv(x_exp, sum_exp) ### END YOUR CODE return out
def span_parser(self, sentence, is_train, elmo_embeddings, cur_word_index, gold=None): if gold is not None: assert isinstance(gold, ParseNode) lstm_outputs = self._featurize_sentence(sentence, is_train=is_train, elmo_embeddings=elmo_embeddings, cur_word_index=cur_word_index) encodings = [] span_to_index = {} for start in range(0, len(sentence)): for end in range(start + 1, len(sentence) + 1): span_to_index[(start, end)] = len(encodings) encodings.append(self._get_span_encoding(start, end, lstm_outputs)) label_log_probabilities = self._encodings_to_label_log_probabilities(encodings) total_loss = dy.zeros(1) if is_train: for start in range(0, len(sentence)): for end in range(start + 1, len(sentence) + 1): gold_label = gold.oracle_label(start, end) gold_label_index = self.label_vocab.index(gold_label) index = span_to_index[(start, end)] total_loss -= label_log_probabilities[gold_label_index][index] return None, total_loss else: label_log_probabilities_np = label_log_probabilities.npvalue() tree, additional_info = optimal_parser(label_log_probabilities_np, span_to_index, sentence, self.empty_label_index, self.label_vocab, gold) return tree, additional_info, dy.exp(label_log_probabilities).npvalue()
def train(self, mini_batch, num_train, k): words, pos_tags, chars, langs, signs, masks = mini_batch # Getting the last hidden layer from BiLSTM. rnn_out = self.rnn_mlp(mini_batch, True) h_out = rnn_out[-1] t_out_d = dy.reshape(h_out, (h_out.dim()[0][0], h_out.dim()[1])) t_out = dy.transpose(t_out_d) # Calculating the kq values for NCE. kq = dy.scalarInput(float(k) / num_train) lkq = dy.log(kq) loss_values = [] for i in range(len(langs)): for j in range(i + 1, len(langs)): if (langs[i] != langs[j]) and (signs[i] == 1 or signs[j] == 1): lu = -dy.squared_distance(t_out[i], t_out[j]) denom = dy.log(dy.exp(lu) + kq) if signs[i] == signs[j]: # both one nom = lu else: nom = lkq loss_values.append(denom - nom) err_value = 0 if len(loss_values) > 0: err = dy.esum(loss_values) / len(loss_values) err.forward() err_value = err.value() err.backward() self.trainer.update() dy.renew_cg() return err_value
def _policy_shape_probs(self, prob_dist): # TODO: this is specific to Alchemy num_actions = len(self.output_action_vocabulary) - 1 num_locations = len(self.output_location_vocabulary) - 1 num_arguments = len(self.output_argument_vocabulary) - 1 new_probdist = dy.zeros(prob_dist.dim()[0]) zeroes = numpy.zeros(num_locations * num_arguments) ones = numpy.ones(num_locations * num_arguments) eos_prob = prob_dist[self._all_output_vocabulary.lookup_index((EOS, NO_ARG, NO_ARG))] action_idx = 0 for action in self.output_action_vocabulary: masks = numpy.concatenate( (numpy.repeat(zeroes, action_idx), ones, numpy.repeat(zeroes, num_actions - action_idx - 1))) actions_masks = dy.reshape(dy.inputTensor(masks), (num_actions * num_locations * num_arguments, 1)) if action == EOS: new_probdist += dy.cmult(actions_masks, prob_dist) / 2. elif action == "push": new_probdist += dy.cmult(actions_masks, prob_dist) + eos_prob / (2. * 56.) elif action == "pop": new_probdist += dy.cmult(actions_masks, prob_dist) if self.args.syntax_restricted: return dy.exp(dy.log_softmax(dy.cmult(new_probdist, prob_dist), restrict = self._valid_action_indices)) else: return dy.softmax(dy.cmult(new_probdist, prob_dist))
def intra_sent_attend(self, vecs): numVecs = len(vecs) fVecs = [dt.tanh(self.SelIntraFW * v) for v in vecs] expE = [] for i, fq in enumerate(fVecs): row = [] for j, fc in enumerate(fVecs): row.append( dt.exp( dt.dot_product(fq, fc) + self.SelIntraBias[i - j + int(config.d["DIST_BIAS_DIM"] / 2)])) expE.append(row) invSumExpE = [] for i in xrange(numVecs): invSumExpE.append(dt.pow(dt.esum(expE[i]), dt.scalarInput(-1))) alpha = [] for i in xrange(numVecs): s = dt.esum([vecs[j] * expE[i][j] for j in xrange(numVecs)]) alpha.append(s * invSumExpE[i]) return [ dt.tanh(self.SelIntraHW * dt.concatenate([v, a])) for v, a in zip(vecs, alpha) ]
def calc_loss(sent): dy.renew_cg() # Transduce all batch elements with an LSTM src = sent[0] trg = sent[1] # initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() # get the output of the first LSTM src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output() # Now compute mean and standard deviation of source hidden state. W_mean = dy.parameter(W_mean_p) V_mean = dy.parameter(V_mean_p) b_mean = dy.parameter(b_mean_p) W_var = dy.parameter(W_var_p) V_var = dy.parameter(V_var_p) b_var = dy.parameter(b_var_p) # The mean vector from the encoder. mu = mlp(src_output, W_mean, V_mean, b_mean) # This is the diagonal vector of the log co-variance matrix from the encoder # (regard this as log variance is easier for furture implementation) log_var = mlp(src_output, W_var, V_var, b_var) # Compute KL[N(u(x), sigma(x)) || N(0, I)] # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var)) z = reparameterize(mu, log_var) # now step through the output sentence all_losses = [] current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)]) prev_word = trg[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for next_word in trg[1:]: # feed the current state into the current_state = current_state.add_input(LOOKUP_TRG[prev_word]) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) all_losses.append(dy.pickneglogsoftmax(s, next_word)) prev_word = next_word softmax_loss = dy.esum(all_losses) return kl_loss, softmax_loss
def decomp_attend(self, vecsA, vecsB): # Fq^T Fc -> need to expedite using native matrix/tensor multiplication Fq = vecsA # the original word vector, not yet passing a NN as in Eq.1, # need a function F Fc = vecsB # need a function F expE = [] for fq in Fq: row = [] for fc in Fc: row.append(dt.exp(dt.dot_product(fq, fc))) expE.append(row) #print ("debug: expE", expE[0][0].value()) invSumExpEi = [] for i in xrange(len(Fq)): invSumExpEi.append(dt.pow(dt.esum(expE[i]), dt.scalarInput(-1))) invSumExpEj = [] for j in xrange(len(Fc)): invSumExpEj.append( dt.pow(dt.esum([expE[i][j] for i in xrange(len(Fq))]), dt.scalarInput(-1))) beta = [] for i in xrange(len(Fq)): s = dt.esum([Fc[j] * expE[i][j] for j in xrange(len(Fc))]) beta.append(s * invSumExpEi[i]) #print("debug: beta", beta[0].value()) alpha = [] for j in xrange(len(Fc)): s = dt.esum([Fc[j] * expE[i][j] for i in xrange(len(Fq))]) alpha.append(s * invSumExpEj[j]) #print("debug: alpha", alpha[0].value()) # Compare v1i = [ dt.logistic(dt.concatenate([Fq[i], beta[i]])) for i in xrange(len(Fq)) ] # need a function G v2j = [ dt.logistic(dt.concatenate([Fc[j], alpha[j]])) for j in xrange(len(Fc)) ] # need a function G #print ("debug: v1i", v1i[0].value()) #print ("debug: v2j", v2j[0].value()) # Aggregate v1 = dt.esum(v1i) v2 = dt.esum(v2j) #print ("debug: v1.value()", v1.value()) #print ("debug: v2.value()", v2.value()) #colScore = dt.logistic(dt.dot_product(self.SelHW, dt.concatenate([v1,v2]))) return dt.dot_product(v1, v2)
def log_sum_exp(scores): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dy.pick(scores, argmax_score) max_score_expr_broadcast = dy.concatenate([max_score_expr] * self.dim_output) return max_score_expr + dy.log( dy.sum_elems( dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
def log_sum_exp(tag_score_arr): argmax = np.argmax(tag_score_arr.value()) max_score = tag_score_arr[argmax] score = max_score max_arr = dynet.concatenate( [max_score for i in range(len(self.pos) + 2)]) score += dynet.log( dynet.sum_dim(dynet.exp(tag_score_arr - max_arr), [0])) return score
def loss_function(recon_x, x, mu, logvar): BCE = dy.binary_log_loss(recon_x, x) # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False) # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar)) return BCE + KLD
def log_sum_exp(scores): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dy.pick(scores, argmax_score) max_score_expr_broadcast = dy.concatenate([max_score_expr] * self.tagset_size) return max_score_expr + dy.log( dy.sum_dim( dy.transpose(dy.exp(scores - max_score_expr_broadcast)), [1]))
def calc_loss(self, policy): if self.weight < 1e-8: return None neg_entropy = [] for i, ll in enumerate(policy): if self.valid_pos is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) loss = dy.sum_batches(dy.sum_elems(dy.cmult(dy.exp(ll), ll))) neg_entropy.append(dy.sum_batches(loss)) return self.weight * dy.esum(neg_entropy)
def log_sum_exp(scores): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dynet.pick(scores, argmax_score) max_score_expr_broadcast = dynet.concatenate([max_score_expr] * (self.n_tags + 2)) return max_score_expr + dynet.log( dynet.sum_cols( dynet.transpose( dynet.exp(scores - max_score_expr_broadcast))))
def softmax(x): ### YOUR CODE HERE x_max = dy.max_dim(x, 1) x_sub = dy.colwise_add(x, -x_max) x_exp = dy.exp(x_sub) x_sum = dy.sum_cols(x_exp) x_tmp = dy.zeroes(x.dim()[0]) x_tmp = dy.colwise_add(x_tmp, x_sum) out = dy.cdiv(x_exp, x_tmp) ### END YOUR CODE return out
def selu(x): """ :type x: dn.Expression :rtype: dn.Expression """ positive = dn.rectify(x) positive_indicator = dn.rectify(dn.cdiv(positive, positive + epsilon)) negative = -dn.rectify(-x) exp_negative = dn.exp(negative) - positive_indicator exp_negative_minus_alpha = exp_negative * alpha - alpha + positive_indicator * alpha # x>0: x=x * scale; x<0: x = (alpha * exp(x) - alpha) * scale ret = (positive + exp_negative_minus_alpha) * scale return ret
def marginals(self, inside_chart, outside_chart, lognormalizer, semiring=LogProbSemiring): marginals = {} for node in inside_chart: marginals[node] = dy.exp( semiring.division( semiring.product(inside_chart[node], outside_chart[node]), lognormalizer)) return marginals
def loss_function(recon_x, x, mu, logvar): BCE = dy.binary_log_loss( recon_x, x ) # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False) # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar)) return BCE + KLD
def __call__(self, logsoftmaxes, mask): strength = self.strength.value() if strength == 0: return 0 neg_entropy = [] for i, logsoftmax in enumerate(logsoftmaxes): loss = dy.cmult(dy.exp(logsoftmax), logsoftmax) if mask is not None: loss = dy.cmult(dy.inputTensor(mask[i], batched=True), loss) neg_entropy.append(loss) return strength * dy.sum_elems(dy.esum(neg_entropy))
def calc_reinforce_loss(words, tags, delta): dy.renew_cg() # Transduce all batch elements with an LSTM word_reps = LSTM.transduce([LOOKUP[x] for x in words]) # Softmax scores W = dy.parameter(W_sm) b = dy.parameter(b_sm) #calculate the probability distribution scores = [dy.affine_transform([b, W, x]) for x in word_reps] losses = [ dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags) ] probs = [-dy.exp(loss).as_array() for loss in losses] #then take samples from the probability distribution samples = [np.random.choice(range(len(x)), p=x) for x in probs] #calculate accuracy=reward correct = [sample == tag for sample, tag in zip(samples, tags)] r_i = float(sum(correct)) / len(correct) r = dy.constant((1), r_i) # Reward baseline for each word W_bl = dy.parameter(W_bl_p) b_bl = dy.parameter(b_bl_p) r_b = [ dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps ] #we need to take the value in order to break the computation graph #as the reward portion is trained seperatley and not backpropogated through during the overall score rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b] #the scores for training the baseline baseline_scores = [dy.square(r - x) for x in r_b] #then calculate the reinforce scores using reinforce reinforce_scores = [ r_s * score for r_s, score in zip(rewards_over_baseline, scores) ] #we want the first len(sent)-delta scores from xent then delta scores from reinforce #for mixer if len(scores) > delta: mixer_scores = scores[:len(scores) - delta] + reinforce_scores[delta - 1:] else: mixer_scores = reinforce_scores return dy.esum(mixer_scores), dy.esum(baseline_scores)
def log_sum_exp_dim_0(x): # numerically stable log_sum_exp dims = x.dim() max_score = dy.max_dim(x, 0) # (dim_1, batch_size) if len(dims[0]) == 1: max_score_extend = max_score else: max_score_reshape = dy.reshape(max_score, (1, dims[0][1]), batch_size=dims[1]) max_score_extend = dy.concatenate([max_score_reshape] * dims[0][0]) x = x - max_score_extend exp_x = dy.exp(x) # (dim_1, batch_size), if no dim_1, return ((1,), batch_size) log_sum_exp_x = dy.log(dy.mean_dim(exp_x, d=[0], b=False) * dims[0][0]) return log_sum_exp_x + max_score
def log_sum_exp(self, scores): """ :param scores: observation scores for all possible tag sequences :return: \log (\sum(exp(S(y)))) """ scores_val = scores.npvalue() max_idx = np.argmax(scores_val) # introduce max_scores to avoid underflow # if not, the results will be INF or -INF # dynet expression of maximum scores max_score = dy.pick(scores, max_idx) max_score_broadcast = dy.concatenate([max_score] * (self.dim_ts_y + 2)) # shift the center of exponential sum to (scores - max) return max_score + dy.log(dy.sum_elems(dy.transpose(dy.exp(scores - max_score_broadcast))))
def backward(self, word_vectors, label): dy.renew_cg() x = dy.inputTensor(word_vectors) y = dy.inputTensor(label) logit = self.build_graph(x) # q表示对正样本的加权 # 公式见https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits q = 15 l = 1 + (q - 1) * y loss = (1 - y) * logit + l * (dy.log(1 + dy.exp(-dy.abs(logit))) + dy.rectify(-logit)) res = loss.value() loss.backward() return res
def pz(self, eq): """ Gumbel softmax on distribution over z. """ W = dy.parameter(self.W) prob = dy.softmax(W * eq) gumbel = dy.random_gumbel(self.num_clusters) y = [] denom = [] for z in range(self.num_clusters): pi_i = prob[z] g_i = gumbel[z] val = dy.exp((dy.log(pi_i) + g_i) / self.temp) denom.append(val) denom = dy.esum(denom) for z in range(self.num_clusters): pi_i = prob[z] g_i = gumbel[z] numerator = dy.exp((dy.log(pi_i) + g_i) / self.temp) y.append(dy.cdiv(numerator, denom)) logits = dy.concatenate(y) return logits
def calc_loss_basic(self, frames, label): # Renew the computation graph dy.renew_cg() # Initialize LSTM init_state_src = self.lstm_builder.initial_state() # Instantiate the params W_mean = dy.parameter(self.W_mean_p) V_mean = dy.parameter(self.V_mean_p) b_mean = dy.parameter(self.b_mean_p) W_var = dy.parameter(self.W_var_p) V_var = dy.parameter(self.V_var_p) b_var = dy.parameter(self.b_var_p) input_frames = dy.inputTensor(frames) output_label = label # Get the LSTM embeddings src_output = init_state_src.add_inputs( [frame for frame in input_frames])[-1].output() # Get the mean and diagonal log covariance from the encoder mu = self.mlp(src_output, W_mean, V_mean, b_mean) log_var = self.mlp(src_output, W_mean, V_mean, b_mean) # Compute the KL Divergence loss kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var)) # Reparameterize z = self.reparameterize(mu, log_var) W_sm = dy.parameter(self.W_sm_p) b_sm = dy.parameter(self.b_sm_p) # Calculate the reconstruction loss pred = dy.affine_transform([b_sm, W_sm, z]) label_embedding = self.lookup[label] #print label, label_embedding recons_loss = dy.pickneglogsoftmax(pred, label) return kl_loss, recons_loss
def max_margin_weighting(instance, pred_states, pred_scores_v): pred_scores_v = np.array(pred_scores_v) assert len(pred_states) == len(pred_scores_v) # assert len(instance.states) == len(pred_states[0]) correct_denotations = [] for i, states in enumerate(pred_states): if states[-1] == instance.states[-1]: correct_denotations.append(i) if not correct_denotations: weights = np.zeros_like(pred_scores_v) else: weights = dy.exp( dy.log_softmax(dy.inputVector(pred_scores_v), correct_denotations)).npvalue() return weights, correct_denotations
def calc_reinforce_loss(words, tags, delta): dy.renew_cg() # Transduce all batch elements with an LSTM word_reps = LSTM.transduce([LOOKUP[x] for x in words]) # Softmax scores W = dy.parameter(W_sm) b = dy.parameter(b_sm) #calculate the probability distribution scores = [dy.affine_transform([b, W, x]) for x in word_reps] losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)] probs = [-dy.exp(loss).as_array() for loss in losses] #then take samples from the probability distribution samples = [np.random.choice(range(len(x)), p=x) for x in probs] #calculate accuracy=reward correct = [sample == tag for sample, tag in zip(samples, tags)] r_i = float(sum(correct))/len(correct) r = dy.constant((1), r_i) # Reward baseline for each word W_bl = dy.parameter(W_bl_p) b_bl = dy.parameter(b_bl_p) r_b = [dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps] #we need to take the value in order to break the computation graph #as the reward portion is trained seperatley and not backpropogated through during the overall score rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b] #the scores for training the baseline baseline_scores = [dy.square(r - x) for x in r_b] #then calculate the reinforce scores using reinforce reinforce_scores = [r_s*score for r_s, score in zip(rewards_over_baseline, scores)] #we want the first len(sent)-delta scores from xent then delta scores from reinforce #for mixer if len(scores) > delta: mixer_scores = scores[:len(scores)-delta] + reinforce_scores[delta-1:] else: mixer_scores = reinforce_scores return dy.esum(mixer_scores), dy.esum(baseline_scores)
def sample(self, x: dy.Expression, n: numbers.Integral, temperature: numbers.Real=1.0): assert temperature != 0.0 scores_expr = self.calc_log_probs(x) if temperature != 1.0: scores_expr *= 1.0 / temperature scores = dy.softmax(scores_expr).npvalue() else: scores = dy.exp(scores_expr).npvalue() # Numpy is very picky. If the sum is off even by 1e-8 it complains. scores /= sum(scores) a = range(scores.shape[0]) samples = np.random.choice(a, (n,), replace=True, p=scores) r = [] for word in samples: r.append((word, dy.pick(scores_expr, word))) return r