def embed(self, x: Union[batchers.Batch, numbers.Integral]) -> dy.Expression: if self.train and self.word_dropout > 0.0 and self.word_id_mask is None: batch_size = x.batch_size() if batchers.is_batched(x) else 1 self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)] emb_e = dy.parameter(self.embeddings) # single mode if not batchers.is_batched(x): if self.train and self.word_id_mask and x in self.word_id_mask[0]: ret = dy.zeros((self.emb_dim,)) else: ret = dy.pick(emb_e, index=x) if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm # minibatch mode else: ret = dy.pick_batch(emb_e, x) if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())): dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True) ret = dy.cmult(ret, dropout_mask) if self.train and self.weight_noise > 0.0: ret = dy.noise(ret, self.weight_noise) return ret
def get_normalized_reps(self, embs, forward_lstm, backward_lstm, encode=False): word_reps = [dy.concatenate([forward_lstm.initial_state().transduce(emb)[-1], backward_lstm.initial_state().transduce(reversed(emb))[-1]]) for emb in embs] if not encode: return [dy.cdiv(rep, dy.l2_norm(rep)) for rep in word_reps] else: return [dy.cdiv(rep, dy.l2_norm(rep)).value() for rep in word_reps]
def embed(self, x): if self.train and self.word_dropout > 0.0 and self.word_id_mask is None: batch_size = x.batch_size() if xnmt.batcher.is_batched(x) else 1 self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)] # single mode if not xnmt.batcher.is_batched(x): if self.train and self.word_id_mask and x in self.word_id_mask[0]: ret = dy.zeros((self.emb_dim,)) else: ret = self.embeddings[x] if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm # minibatch mode else: ret = self.embeddings.batch(x) if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())): dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True) ret = dy.cmult(ret, dropout_mask) if self.train and self.weight_noise > 0.0: ret = dy.noise(ret, self.weight_noise) return ret
def __cosine_loss(self, pred, gold): sn1 = dy.l2_norm(pred) sn2 = dy.l2_norm(gold) mult = dy.cmult(sn1, sn2) dot = dy.dot_product(pred, gold) div = dy.cdiv(dot, mult) vec_y = dy.scalarInput(2) res = dy.cdiv(1 - div, vec_y) return res
def on_calc_additional_loss(self, trg, generator, generator_loss): assert hasattr( generator, "losses"), "Must support multi sample encoder from generator." if self.policy_learning is None: return None ### Calculate reward rewards = [] trg_counts = dy.inputTensor([t.len_unpadded() for t in trg], batched=True) # Iterate through all samples for i, (loss, actions) in enumerate( zip(generator.losses, self.compose_output)): reward = FactoredLossExpr() # Adding all reward from the translator for loss_key, loss_value in loss.get_nobackprop_loss().items(): if loss_key == 'mle': reward.add_loss('mle', dy.cdiv(-loss_value, trg_counts)) else: reward.add_loss(loss_key, -loss_value) if self.length_prior is not None: reward.add_loss( 'seg_lp', self.length_prior.log_ll(self.seg_size_unpadded[i])) rewards.append(dy.esum(list(reward.expr_factors.values()))) ### Calculate losses return self.policy_learning.calc_loss(rewards)
def softmax(x): """ Compute the softmax function in tensorflow. You might find the tensorflow functions tf.exp, tf.reduce_max, tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may not need to use all of these functions). Recall also that many common tensorflow operations are sugared (e.g. x * y does a tensor multiplication if x and y are both tensors). Make sure to implement the numerical stability fixes as in the previous homework! Args: x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are represented by row-vectors. (For simplicity, no need to handle 1-d input as in the previous homework) Returns: out: tf.Tensor with shape (n_sample, n_features). You need to construct this tensor in this problem. """ ### YOUR CODE HERE x_max = dy.max_dim(x, 1) x_sub = dy.colwise_add(x, -x_max) x_exp = dy.exp(x_sub) sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp)) out = dy.cdiv(x_exp, sum_exp) ### END YOUR CODE return out
def test_item(model, sentence): seq = [ model.wlookup[int(model.w2i.get(entry, 0))] for entry in sentence.preprocessed_sentence ] if len(seq) > 0: encoded_sequence = encode_sequence(model, seq, model.sentence_rnn) global_max = max_pooling(encoded_sequence) global_min = average_pooling(encoded_sequence) if len(encoded_sequence) > 0: att_mlp_outputs = [] for e in encoded_sequence: mlp_out = (model.attention_w * e) + model.attention_b att_mlp_outputs.append(mlp_out) lst = [] for o in att_mlp_outputs: lst.append(dy.exp(dy.sum_elems(dy.cmult(o, model.att_context)))) sum_all = dy.esum(lst) probs = [dy.cdiv(e, sum_all) for e in lst] att_context = dy.esum( [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)]) context = dy.concatenate([att_context, global_max, global_min]) y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b) sentence.prediction_result = y_pred.scalar_value() dy.renew_cg() return sentence.prediction_result return 0
def transform(self, input_expr: dy.Expression, mask: Optional[batchers.Mask]=None): """ Apply batch norm. Args: input_expr: input mask: compute statistics only over unmasked parts of the input expression """ dim_in = input_expr.dim() param_bn_gamma = dy.parameter(self.gamma) param_bn_beta = dy.parameter(self.beta) if self.train: num_unmasked = 0 if mask is not None: input_expr = set_masked_to_mean(mask, input_expr, self.time_first) num_unmasked = (mask.np_arr.size - np.count_nonzero(mask.np_arr)) * broadcast_factor(mask, input_expr) bn_mean = dy.moment_dim(input_expr, self.get_stat_dimensions(), 1, True, num_unmasked) neg_bn_mean_reshaped = -dy.reshape(-bn_mean, self.get_normalizer_dimensionality()) self.population_running_mean += (-BN_MOMENTUM) * self.population_running_mean + BN_MOMENTUM * bn_mean.npvalue() bn_std = dy.std_dim(input_expr, self.get_stat_dimensions(), True, num_unmasked) self.population_running_std += (-BN_MOMENTUM) * self.population_running_std + BN_MOMENTUM * bn_std.npvalue() else: neg_bn_mean_reshaped = -dy.reshape(dy.inputVector(self.population_running_mean), self.get_normalizer_dimensionality()) bn_std = dy.inputVector(self.population_running_std) bn_numerator = input_expr + neg_bn_mean_reshaped bn_xhat = dy.cdiv(bn_numerator, dy.reshape(bn_std, self.get_normalizer_dimensionality()) + BN_EPS) bn_y = dy.cmult(param_bn_gamma, bn_xhat) + param_bn_beta # y = gamma * xhat + beta dim_out = bn_y.dim() self.save_processed_arg("population_running_mean", self.population_running_mean) self.save_processed_arg("population_running_std", self.population_running_std) assert dim_out == dim_in return bn_y
def set_masked_to_mean(mask, tensor_expr, time_first=False): """ Set masked parts of the tensor expr to the mean of the unmasked parts. """ if np.count_nonzero(mask.np_arr) == 0: return tensor_expr else: dim_before = tensor_expr.dim() reshape_size = mask_reshape_size(mask, tensor_expr.dim(), time_first) inv_mask_expr = dy.inputTensor( 1.0 - np.reshape(mask.np_arr.transpose(), reshape_size), batched=True) unmasked = dy.cmult(tensor_expr, inv_mask_expr) unmasked_mean = unmasked while sum( unmasked_mean.dim()[0] ) > 1: # loop because mean_dim only supports reducing up to 2 dimensions at a time unmasked_mean = dy.mean_dim( unmasked_mean, list(range(min(2, len(unmasked_mean.dim()[0])))), unmasked_mean.dim()[1] > 1, n=1) # this is mean without normalization == sum unmasked_mean = dy.cdiv( unmasked_mean, dy.inputTensor(np.asarray([ (mask.np_arr.size - np.count_nonzero(mask.np_arr)) * broadcast_factor(mask, tensor_expr) ]), batched=False)) mask_expr = dy.cmult( dy.inputTensor(np.reshape(mask.np_arr.transpose(), reshape_size), batched=True), unmasked_mean) ret = unmasked + mask_expr assert ret.dim() == dim_before return ret
def l2_normalize(vector): square_sum = dy.sqrt( dy.bmax( dy.sum_elems(dy.square(vector)), np.finfo(float).eps * dy.ones((1))[0], )) return dy.cdiv(vector, square_sum)
def dycosine(query_vec, question_vec): num = dy.transpose(query_vec) * question_vec dem1 = dy.sqrt(dy.transpose(query_vec) * query_vec) dem2 = dy.sqrt(dy.transpose(question_vec) * question_vec) dem = dem1 * dem2 return dy.cdiv(num, dem)
def on_calc_additional_loss(self, reward): if not self.learn_segmentation: return None ret = LossBuilder() if self.length_prior_alpha > 0: reward += self.segment_length_prior * self.length_prior_alpha reward = dy.cdiv(reward - dy.mean_batches(reward), dy.std_batches(reward)) # Baseline Loss if self.use_baseline: baseline_loss = [] for i, baseline in enumerate(self.bs): baseline_loss.append(dy.squared_distance(reward, baseline)) ret.add_loss("Baseline", dy.esum(baseline_loss)) # Reinforce Loss lmbd = self.lmbd.get_value(self.warmup_counter) if lmbd > 0.0: reinforce_loss = [] # Calculating the loss of the baseline and reinforce for i in range(len(self.segment_decisions)): ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i]) if self.use_baseline: r_i = reward - self.bs[i] else: r_i = reward reinforce_loss.append(dy.logistic(r_i) * ll) ret.add_loss("Reinforce", -dy.esum(reinforce_loss) * lmbd) # Total Loss return ret
def conditional2(self, joint, prior1): size1, size2 = joint.dim()[0] conditional = [] for z in xrange(size1): z_copied = dy.concatenate([prior1[z] for _ in xrange(size2)]) conditional.append(dy.cdiv(dy.pick(joint, z), z_copied)) return conditional
def pz(self, eq): """ Gumbel softmax on distribution over z. """ W = dy.parameter(self.W) prob = dy.softmax(W * eq) gumbel = dy.random_gumbel(self.num_clusters) # y = [] # denom = [] # for z in range(self.num_clusters): # pi_i = prob[z] # g_i = gumbel[z] # val = dy.exp((dy.log(pi_i)+g_i)/self.temp) # denom.append(val) # denom = dy.esum(denom) # for z in range(self.num_clusters): # pi_i = prob[z] # g_i = gumbel[z] # numerator = dy.exp((dy.log(pi_i)+g_i)/self.temp) # y.append(dy.cdiv(numerator, denom)) logits = dy.softmax( dy.cdiv(dy.esum([prob, gumbel]), dy.inputVector([self.temp]))) # logits = dy.concatenate(y) # print(np.max(logits.npvalue())) return logits
def eval_dict_dataset(dataset, net, shortlist, proj, parsed): ranks = [] num_batches = len(dataset) if parsed: dim = dataset[0][0].shape[0] batch_size = 1 else: dim, batch_size = dataset[0][0].shape for batch_num, data in enumerate(dataset): if parsed: words, definitions, _ = data else: words, definitions = data words = np.reshape(np.transpose(words), (batch_size, dim)) dy.renew_cg() P = dy.parameter(proj) if parsed: outputs = net.do_parse_tree(definitions) else: outputs, _ = net(definitions) outputs = P * outputs normalised_outputs = outputs * dy.cdiv(dy.inputTensor([1]), dy.sqrt(dy.squared_norm(outputs))) normalised_outputs = np.reshape(np.transpose(normalised_outputs.npvalue()), (batch_size, dim)) for output, word in zip(normalised_outputs, words): target_similarity = np.dot(word, output) similarities = np.dot(shortlist, output) rank = (similarities > target_similarity).sum() ranks.append(rank) total = len(ranks) accuracy10 = float(sum(int(r <= 10) for r in ranks))/total accuracy100 = float(sum(int(r <= 100) for r in ranks))/total return np.median(ranks), accuracy10, accuracy100
def cell_expr(self): """Returns: dy.Expression: cell state; if not given, it is inferred as inverse tanh of main expression """ if self._cell_expr is None: self._cell_expr = 0.5 * dy.log( dy.cdiv(1.+self._main_expr, 1.-self._main_expr) ) return self._cell_expr
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() src_height = src.dim()[0][0] src_width = src.dim()[0][1] # src_channels = 1 batch_size = src.dim()[1] # convolution and pooling layers # src dim is ((40, 1000), 128) src = padding(src, self.filter_width[0]+3) l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128) pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128) pool1 = padding(pool1, self.filter_width[1]+3) l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128) pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128) pool2 = padding(pool2, self.filter_width[2]) l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128) pool3 = dy.max_dim(l3, d = 1) my_norm = dy.l2_norm(pool3) + 1e-6 output = dy.cdiv(pool3,my_norm) output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size) return ExpressionSequence(expr_tensor=output)
def attention(self, Q, K, V): weights = dy.softmax(dy.cdiv(Q * dy.transpose(K), dy.scalarInput(np.sqrt(self.d_k))), d=1) #v = weights.value() #np.save("att.txt", weights.value()) return weights * V
def _attend(self, query, mask=None): # query ((H), B) # mask ((T, 1), B) query = unsqueeze(query, 0) # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B) attn_scores = dy.cdiv(dy.transpose(query * self.context), dy.scalarInput(self.scale)) if mask is not None: attn_scores = dy.cmult(attn_scores, mask[0]) + (mask[1] * dy.scalarInput(-1e9)) return dy.softmax(attn_scores) # ((T, 1), B)
def generate(self, src, forced_trg_ids): assert not forced_trg_ids assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented" src = src[0] # Generating outputs outputs = [] event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if self.mode in ["avg_mlp", "final_mlp"]: if self.generate_per_step: assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True" scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings] else: if self.mode == "avg_mlp": encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1]) elif self.mode == "final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode == "lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True)) enc_lin.append(step_linear) if self.generate_per_step: scores = [dy.logistic(enc_i) for enc_i in enc_lin] else: if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") if self.generate_per_step: output_actions = [np.argmax(score_i.npvalue()) for score_i in scores] score = np.sum([np.max(score_i.npvalue()) for score_i in scores]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) else: scores_arr = scores.npvalue() output_actions = list(np.nonzero(scores_arr > 0.5)[0]) score = np.sum(scores_arr[scores_arr > 0.5]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) return outputs
def cal_context(self, s, selected=None): ws = self.cal_scores(s) if selected is None: return self.es_matrix * ws, ws selected_ws = dy.select_rows(ws, selected) selected_ws = dy.cdiv(selected_ws, dy.sum_elems(selected_ws)) return dy.concatenate_cols( [es[index] for index in selected]) * selected_ws, ws
def on_calc_additional_loss(self, translator_loss): if not self.learn_segmentation or self.segment_decisions is None: return None reward = -translator_loss["mle"] if not self.log_reward: reward = dy.exp(reward) reward = dy.nobackprop(reward) # Make sure that reward is not scalar, but rather based on the each batch item assert reward.dim()[1] == len(self.src_sent) # Mask enc_mask = self.enc_mask.get_active_one_mask().transpose() if self.enc_mask is not None else None # Compose the lose ret = LossBuilder() ## Length prior alpha = self.length_prior_alpha.value() if self.length_prior_alpha is not None else 0 if alpha > 0: reward += self.segment_length_prior * alpha # reward z-score normalization if self.z_normalization: reward = dy.cdiv(reward-dy.mean_batches(reward), dy.std_batches(reward) + EPS) ## Baseline Loss if self.use_baseline: baseline_loss = [] for i, baseline in enumerate(self.bs): loss = dy.squared_distance(reward, baseline) if enc_mask is not None: loss = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), loss) baseline_loss.append(loss) ret.add_loss("Baseline", dy.esum(baseline_loss)) if self.print_sample: print(dy.exp(self.segment_logsoftmaxes[i]).npvalue().transpose()[0]) ## Reinforce Loss lmbd = self.lmbd.value() if lmbd > 0.0: reinforce_loss = [] # Calculating the loss of the baseline and reinforce for i in range(len(self.segment_decisions)): ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i]) if self.use_baseline: r_i = reward - dy.nobackprop(self.bs[i]) else: r_i = reward if enc_mask is not None: ll = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), ll) reinforce_loss.append(r_i * -ll) loss = dy.esum(reinforce_loss) * lmbd ret.add_loss("Reinforce", loss) if self.confidence_penalty: ls_loss = self.confidence_penalty(self.segment_logsoftmaxes, enc_mask) ret.add_loss("Confidence Penalty", ls_loss) # Total Loss return ret
def softmax(x): ### YOUR CODE HERE x_max = dy.max_dim(x, 1) x_sub = dy.colwise_add(x, -x_max) x_exp = dy.exp(x_sub) x_sum = dy.sum_cols(x_exp) x_tmp = dy.zeroes(x.dim()[0]) x_tmp = dy.colwise_add(x_tmp, x_sum) out = dy.cdiv(x_exp, x_tmp) ### END YOUR CODE return out
def selu(x): """ :type x: dn.Expression :rtype: dn.Expression """ positive = dn.rectify(x) positive_indicator = dn.rectify(dn.cdiv(positive, positive + epsilon)) negative = -dn.rectify(-x) exp_negative = dn.exp(negative) - positive_indicator exp_negative_minus_alpha = exp_negative * alpha - alpha + positive_indicator * alpha # x>0: x=x * scale; x<0: x = (alpha * exp(x) - alpha) * scale ret = (positive + exp_negative_minus_alpha) * scale return ret
def calc_nll(self, src, trg): event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) if self.mode in ["avg_mlp", "final_mlp"]: if self.mode=="avg_mlp": if encodings.mask: encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1] elif self.mode=="final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode=="lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True)) enc_lin.append(step_linear) if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") idxs = ([], []) for batch_i in range(trg.batch_size()): for word in set(trg[batch_i]): if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}: idxs[0].append(word) idxs[1].append(batch_i) trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, ) loss_expr = dy.binary_log_loss(scores, trg_scores) return loss_expr
def papx(self, example, turn_idx, state): """ Calculate the cost of utterance and action given z. """ encoder_input = example[0] labels = example[1] text = labels[0] goal_utterance = text[turn_idx] pa = self.pa(state) px = self.px(state, goal_utterance) ##################### ## Action probability: ##################### # -> Action Classifier: prev_text = text[:turn_idx] if prev_text == []: prev_text = [[self.vocab.index("<PAD>")]] agreement_space = encoder_input[0] # of form [1, 4, 4] agreement = labels[1] cdata = [agreement_space, prev_text, agreement] self.classifier.predict_example(cdata) logits, _ = self.classifier.get_logits(cdata) label_idx = -999 for idx in range(len(self.classifier.agreement_space)): if agreement == self.classifier.agreement_space[idx]: label_idx = idx action_prob = dy.softmax(logits) pa = dy.softmax(pa) # -> KL Divergence action_diverge = dy.cmult(pa, dy.log(dy.cdiv(pa, action_prob))) action_diverge = dy.sum_elems(action_diverge) ################### ## Text probability: ################### decoder_target = goal_utterance + [self.vocab.index("<END>")] losses = [] for (log_prob, target) in zip(px, decoder_target): losses.append(dy.pickneglogsoftmax(log_prob, target)) text_loss = dy.esum(losses) return dy.esum([action_diverge, text_loss])
def embed(self, x: Union[batchers.Batch, numbers.Integral]) -> dy.Expression: """ Embed a single word in a sentence. :param x: A word id. :return: Embedded word. """ ret = self._embed_word(x, batchers.is_batched(x)) ## Applying Fix normalization if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) * self.fix_norm ## Weight noise only when training if self.train and self.weight_noise > 0.0: ret = dy.noise(ret, self.weight_noise) return ret
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() src_height = src.dim()[0][0] src_width = 1 batch_size = src.dim()[1] W = dy.parameter(self.pW) b = dy.parameter(self.pb) src = dy.reshape(src, (src_height, src_width), batch_size=batch_size) # ((276, 80, 3), 1) # convolution and pooling layers l1 = (W*src)+b output = dy.cdiv(l1,dy.sqrt(dy.squared_norm(l1))) return ExpressionSequence(expr_tensor=output)
def __call__(self, x, soft_labels=False, temperature=None): if self.mlp: W_mlp = dynet.parameter(self.W_mlp) b_mlp = dynet.parameter(self.b_mlp) act = self.mlp_activation x_in = act(W_mlp * x + b_mlp) else: x_in = x # from params to expressions W = dynet.parameter(self.W) b = dynet.parameter(self.b) logits = W*x_in + b if soft_labels and temperature: # calculate the soft labels smoothed with the temperature # see Distilling the Knowledge in a Neural Network elems = dynet.exp(logits / temperature) return dynet.cdiv(elems, dynet.sum_elems(elems)) return self.act(logits)
def show_attention_weights(model, sentence): seq = [ model.wlookup[int(model.w2i.get(entry, 0))] for entry in sentence.preprocessed_sentence ] if len(seq) > 0: encoded_sequence = encode_sequence(model, seq, model.sentence_rnn) if len(encoded_sequence) > 0: att_mlp_outputs = [] for e in encoded_sequence: mlp_out = (model.attention_w * e) + model.attention_b att_mlp_outputs.append(mlp_out) lst = [] for o in att_mlp_outputs: lst.append(dy.exp(dy.sum_elems(dy.cmult(o, model.att_context)))) sum_all = dy.esum(lst) probs = [dy.cdiv(e, sum_all).scalar_value() for e in lst] return probs