Exemple #1
0
  def calc_loss(self, translator, src, trg):
    search_outputs = translator.generate_search_output(src, self.search_strategy)
    sign = -1 if self.inv_eval else 1

    total_loss = FactoredLossExpr()
    for search_output in search_outputs:
      self.eval_score = []
      for trg_i, sample_i in zip(trg, search_output.word_ids):
        # Removing EOS
        sample_i = self.remove_eos(sample_i.tolist())
        ref_i = trg_i.words[:trg_i.len_unpadded()]
        score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i)
        self.eval_score.append(sign * score)
      self.reward = dy.inputTensor(self.eval_score, batched=True)
      # Composing losses
      loss = FactoredLossExpr()
      if self.baseline is not None:
        baseline_loss = []
        losses = []
        for state, logsoft, mask in zip(search_output.state,
                                        search_output.logsoftmaxes,
                                        search_output.mask):
          bs_score = self.baseline.transform(state)
          baseline_loss.append(dy.squared_distance(self.reward, bs_score))
          loss_i = dy.cmult(logsoft, self.reward - bs_score)
          valid = list(np.nonzero(mask)[0])
          losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))
        loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses)))
        loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss)))
      else:
        loss.add_loss("reinforce", dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts))))
      total_loss.add_factored_loss_expr(loss)
    return loss
Exemple #2
0
  def calc_loss(self,
                model: 'model_base.ConditionedModel',
                src: Union[sent.Sentence, 'batchers.Batch'],
                trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr:
    search_outputs = model.generate_search_output(src, self.search_strategy)
    sign = -1 if self.inv_eval else 1

    total_loss = losses.FactoredLossExpr()
    for search_output in search_outputs:
      # Calculate rewards
      eval_score = []
      for trg_i, sample_i in zip(trg, search_output.word_ids):
        # Removing EOS
        sample_i = self.remove_eos(sample_i.tolist())
        ref_i = trg_i.words[:trg_i.len_unpadded()]
        score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i)
        eval_score.append(sign * score)
      reward = dy.inputTensor(eval_score, batched=True)
      # Composing losses
      loss = losses.FactoredLossExpr()
      baseline_loss = []
      cur_losses = []
      for state, mask in zip(search_output.state, search_output.mask):
        bs_score = self.baseline.transform(dy.nobackprop(state.as_vector()))
        baseline_loss.append(dy.squared_distance(reward, bs_score))
        logsoft = model.decoder.scorer.calc_log_probs(state.as_vector())
        loss_i = dy.cmult(logsoft, reward - bs_score)
        cur_losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))
      loss.add_loss("reinforce", dy.sum_elems(dy.esum(cur_losses)))
      loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss)))
      # Total losses
      total_loss.add_factored_loss_expr(loss)
    return loss
Exemple #3
0
  def __call__(self, translator, dec_state, src, trg):
    # TODO: apply trg.mask ?
    samples = []
    logsofts = []
    self.bs = []
    done = [False for _ in range(len(trg))]
    for _ in range(self.sample_length):
      dec_state.context = translator.attender.calc_context(dec_state.rnn_state.output())
      if self.use_baseline:
        h_t = dy.tanh(translator.decoder.context_projector(dy.concatenate([dec_state.rnn_state.output(), dec_state.context])))
        self.bs.append(self.baseline(dy.nobackprop(h_t)))
      logsoft = dy.log_softmax(translator.decoder.get_scores(dec_state))
      sample = logsoft.tensor_value().categorical_sample_log_prob().as_numpy()[0]
      # Keep track of previously sampled EOS
      sample = [sample_i if not done_i else Vocab.ES for sample_i, done_i in zip(sample, done)]
      # Appending and feeding in the decoder
      logsoft = dy.pick_batch(logsoft, sample)
      logsofts.append(logsoft)
      samples.append(sample)
      dec_state = translator.decoder.add_input(dec_state, translator.trg_embedder.embed(xnmt.batcher.mark_as_batch(sample)))
      # Check if we are done.
      done = list(six.moves.map(lambda x: x == Vocab.ES, sample))
      if all(done):
        break

    samples = np.stack(samples, axis=1).tolist()
    self.eval_score = []
    for trg_i, sample_i in zip(trg, samples):
      # Removing EOS
      try:
        idx = sample_i.index(Vocab.ES)
        sample_i = sample_i[:idx]
      except ValueError:
        pass
      try:
        idx = trg_i.words.index(Vocab.ES)
        trg_i.words = trg_i.words[:idx]
      except ValueError:
        pass
      # Calculate the evaluation score
      score = 0 if not len(sample_i) else self.evaluation_metric.evaluate_fast(trg_i.words, sample_i)
      self.eval_score.append(score)
    self.true_score = dy.inputTensor(self.eval_score, batched=True)
    loss = LossBuilder()

    if self.use_baseline:
      for i, (score, _) in enumerate(zip(self.bs, logsofts)):
        logsofts[i] = dy.cmult(logsofts[i], score - self.true_score)
      loss.add_loss("Reinforce", dy.sum_elems(dy.esum(logsofts)))

    else:
        loss.add_loss("Reinforce", dy.sum_elems(dy.cmult(-self.true_score, dy.esum(logsofts))))

    if self.use_baseline:
      baseline_loss = []
      for bs in self.bs:
        baseline_loss.append(dy.squared_distance(self.true_score, bs))
      loss.add_loss("Baseline", dy.sum_elems(dy.esum(baseline_loss)))
    return loss
Exemple #4
0
 def test_save_load_with_gradient(self):
     # Make it so W1 has a gradient
     dy.renew_cg()
     dy.sum_elems(self.W1).backward()
     # Record gradients
     W1_grad = self.W1.grad_as_array()
     W2_grad = self.W2.grad_as_array()
     # Save the ParameterCollection
     self.m.save(self.file)
     # Populate
     self.m.populate(self.file)
     # Check that the gradients were saved
     self.assertTrue(np.allclose(self.W1.grad_as_array(), W1_grad))
     self.assertTrue(np.allclose(self.W2.grad_as_array(), W2_grad))
Exemple #5
0
 def test_save_load_with_gradient(self):
     # Make it so W1 has a gradient
     dy.renew_cg()
     dy.sum_elems(self.W1).backward()
     # Record gradients
     W1_grad = self.W1.grad_as_array()
     W2_grad = self.W2.grad_as_array()
     # Save the ParameterCollection
     self.m.save(self.file)
     # Populate
     self.m.populate(self.file)
     # Check that the gradients were saved
     self.assertTrue(np.allclose(self.W1.grad_as_array(), W1_grad))
     self.assertTrue(np.allclose(self.W2.grad_as_array(), W2_grad))
Exemple #6
0
def aggregate_masked_loss(x: Tensor,
                          mask: 'xnmt.batchers.Mask' = None) -> Tensor:
    """
  Aggregate loss values for unmasked entries.

  Args:
    x: Batched sequence of losses.
    mask: An optional mask for the case of outputs of unequal lengths.

  Returns:
    Batched sequence of losses, with masked ones zeroed out.
  """
    if xnmt.backend_dynet:
        if mask:
            x = dy.cmult(x, dy.inputTensor(1.0 - mask.np_arr.T, batched=True))
        return dy.sum_elems(x)
    else:
        if mask:
            x = torch.mul(
                x,
                torch.as_tensor(1.0 - mask.np_arr,
                                dtype=x.dtype,
                                device=xnmt.device))
        return torch.sum(x, dim=tuple(range(1, len(
            x.size()))))  # sum over all but batch elems
Exemple #7
0
  def attention_entropy(self, a):
    entropy = []
    for a_i in a:
      a_i += EPSILON
      entropy.append(dy.cmult(a_i, dy.log(a_i)))

    return -dy.sum_elems(dy.esum(entropy))
def test_item(model, sentence):
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        if len(encoded_sequence) > 0:
            att_mlp_outputs = []
            for e in encoded_sequence:
                mlp_out = (model.attention_w * e) + model.attention_b
                att_mlp_outputs.append(mlp_out)

            lst = []
            for o in att_mlp_outputs:
                lst.append(dy.exp(dy.sum_elems(dy.cmult(o,
                                                        model.att_context))))

            sum_all = dy.esum(lst)

            probs = [dy.cdiv(e, sum_all) for e in lst]
            att_context = dy.esum(
                [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)])
            context = dy.concatenate([att_context, global_max, global_min])
            y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)
            sentence.prediction_result = y_pred.scalar_value()
            dy.renew_cg()
            return sentence.prediction_result
    return 0
Exemple #9
0
    def word_assoc_score(self, source_idx, target_idx, relation):
        """
        NOTE THAT DROPOUT IS BEING APPLIED HERE
        :param source_idx: embedding index of source atom
        :param target_idx: embedding index of target atom
        :param relation: relation type
        :return: score
        """
        # prepare
        s = self.embeddings[source_idx]
        if self.no_assoc:
            A = dy.const_parameter(self.word_assoc_weights[relation])
        else:
            A = dy.parameter(self.word_assoc_weights[relation])
        dy.dropout(A, self.dropout)
        t = self.embeddings[target_idx]

        # compute
        if self.mode == BILINEAR_MODE:
            return dy.transpose(s) * A * t
        elif self.mode == DIAG_RANK1_MODE:
            diag_A = dyagonalize(A[0])
            rank1_BC = A[1] * dy.transpose(A[2])
            ABC = diag_A + rank1_BC
            return dy.transpose(s) * ABC * t
        elif self.mode == TRANSLATIONAL_EMBED_MODE:
            return -dy.l2_norm(s - t + A)
        elif self.mode == DISTMULT:
            return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
Exemple #10
0
  def calc_loss(self, src, trg, loss_calculator):
    if not batcher.is_batched(src):
      src = batcher.ListBatch([src])

    src_inputs = batcher.ListBatch([s[:-1] for s in src], mask=batcher.Mask(src.mask.np_arr[:,:-1]) if src.mask else None)
    src_targets = batcher.ListBatch([s[1:] for s in src], mask=batcher.Mask(src.mask.np_arr[:,1:]) if src.mask else None)

    self.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src_inputs)
    encodings = self.rnn.transduce(embeddings)
    encodings_tensor = encodings.as_tensor()
    ((hidden_dim, seq_len), batch_size) = encodings.dim()
    encoding_reshaped = dy.reshape(encodings_tensor, (hidden_dim,), batch_size=batch_size * seq_len)
    outputs = self.transform(encoding_reshaped)

    ref_action = np.asarray([sent.words for sent in src_targets]).reshape((seq_len * batch_size,))
    loss_expr_perstep = self.scorer.calc_loss(outputs, batcher.mark_as_batch(ref_action))
    loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len,), batch_size=batch_size)
    if src_targets.mask:
      loss_expr_perstep = dy.cmult(loss_expr_perstep, dy.inputTensor(1.0-src_targets.mask.np_arr.T, batched=True))
    loss_expr = dy.sum_elems(loss_expr_perstep)

    model_loss = loss.FactoredLossExpr()
    model_loss.add_loss("mle", loss_expr)

    return model_loss
Exemple #11
0
    def compute_entropy(self, distribution):
        """ Gets the entropy of a probability distribution that may contain zeroes.

        Inputs:
            probability_distribution (dy.Expression): The probability distribution.

        Returns:
            dy.Expression representing the entropy.
        """
        num_actions = len(self.output_action_vocabulary) - 1
        num_locations = len(self.output_location_vocabulary) - 1
        num_arguments = len(self.output_argument_vocabulary) - 1
        valid_mask = numpy.zeros(num_actions * num_locations * num_arguments)
        for index in self._valid_action_indices:
            valid_mask[index] = 1.
        # This mask is one for all valid indices, and zero for all others.
        valid_mask = dy.inputTensor(valid_mask)

        # This basically replaces everything in the probability distribution
        # with the original value (if valid), or zero (if not valid).
        valid_probs = dy.cmult(valid_mask, distribution)

        # The inverse of valid mask, this gives a value of 1. if something is invalid.
        invalid_probs = 1.-valid_mask

        # The result of this operation is that everything that's valid gets its
        # original probability, and everything that's not gets a probability of 1.
        probs = valid_probs + invalid_probs

        # dy.log(probs) will give log(p(action)) if action is valid, and
        # log(1)=0 for invalid actions.
        # then entropies will be zero for everything that isn't valid, and the
        # actual p log(p) otherwise.
        entropies = dy.cmult(probs, dy.log(probs + 0.00000000001))
        return -dy.sum_elems(entropies)
Exemple #12
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \
            -> dy.Expression:
        assert batchers.is_batched(src) and batchers.is_batched(trg)
        batch_size, encodings, outputs, seq_len = self._encode_src(src)

        if trg.sent_len() != seq_len:
            if self.auto_cut_pad:
                trg = self._cut_or_pad_targets(seq_len, trg)
            else:
                raise ValueError(
                    f"src/trg length do not match: {seq_len} != {len(trg[0])}")

        ref_action = np.asarray([trg_sent.words for trg_sent in trg]).reshape(
            (seq_len * batch_size, ))
        loss_expr_perstep = self.scorer.calc_loss(
            outputs, batchers.mark_as_batch(ref_action))
        # loss_expr_perstep = dy.pickneglogsoftmax_batch(outputs, ref_action)
        loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len, ),
                                       batch_size=batch_size)
        if trg.mask:
            loss_expr_perstep = dy.cmult(
                loss_expr_perstep,
                dy.inputTensor(1.0 - trg.mask.np_arr.T, batched=True))
        loss_expr = dy.sum_elems(loss_expr_perstep)

        return loss_expr
Exemple #13
0
def calc_loss(sents):
    dy.renew_cg()

    src_fwd = LSTM_SRC_FWD.initial_state()
    src_bwd = LSTM_SRC_BWD.initial_state()
    trg_fwd = LSTM_TRG_FWD.initial_state()
    trg_bwd = LSTM_TRG_BWD.initial_state()

    # Encoding
    src_reps = encode_sents(LOOKUP_SRC, src_fwd, src_bwd,
                            [src for src, trg in sents])
    trg_reps = encode_sents(LOOKUP_TRG, trg_fwd, trg_bwd,
                            [trg for src, trg in sents])

    # Concatenate the sentence representations to a single matrix
    mtx_src = dy.concatenate_cols(src_reps)
    mtx_trg = dy.concatenate_cols(trg_reps)

    # Do matrix multiplication to get a matrix of dot product similarity scores
    sim_mtx = dy.transpose(mtx_src) * mtx_trg

    # Calculate the hinge loss over all dimensions
    loss = dy.hinge_dim(sim_mtx, list(range(len(sents))), d=1)

    return dy.sum_elems(loss)
def cross_entropy_loss(y, yhat):
    """
    Compute the cross entropy loss in tensorflow.
    The loss should be summed over the current minibatch.

    y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor
    of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should
    be of dtype tf.float32.

    The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many
    solutions are possible, so you may not need to use all of these functions).

    Note: You are NOT allowed to use the tensorflow built-in cross-entropy
                functions.

    Args:
        y:    tf.Tensor with shape (n_samples, n_classes). One-hot encoded.
        yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a
                    probability distribution and should sum to 1.
    Returns:
        out:  tf.Tensor with shape (1,) (Scalar output). You need to construct this
                    tensor in the problem.
    """

    ### YOUR CODE HERE
    l_yhat = dy.log(yhat)
    product = dy.cmult(y, l_yhat)
    out = (-dy.sum_elems(product))
    ### END YOUR CODE

    return out
Exemple #15
0
    def calc_loss(self, src, trg, loss_calculator):
        assert batcher.is_batched(src) and batcher.is_batched(trg)
        batch_size, encodings, outputs, seq_len = self._encode_src(src)

        if trg.sent_len() != seq_len:
            if self.auto_cut_pad:
                trg = self._cut_or_pad_targets(seq_len, trg)
            else:
                raise ValueError(
                    f"src/trg length do not match: {seq_len} != {len(trg[0])}")

        ref_action = np.asarray([sent.words for sent in trg]).reshape(
            (seq_len * batch_size, ))
        loss_expr_perstep = self.scorer.calc_loss(
            outputs, batcher.mark_as_batch(ref_action))
        # loss_expr_perstep = dy.pickneglogsoftmax_batch(outputs, ref_action)
        loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len, ),
                                       batch_size=batch_size)
        if trg.mask:
            loss_expr_perstep = dy.cmult(
                loss_expr_perstep,
                dy.inputTensor(1.0 - trg.mask.np_arr.T, batched=True))
        loss_expr = dy.sum_elems(loss_expr_perstep)

        model_loss = loss.FactoredLossExpr()
        model_loss.add_loss("mle", loss_expr)

        return model_loss
Exemple #16
0
 def l2_normalize(vector):
     square_sum = dy.sqrt(
         dy.bmax(
             dy.sum_elems(dy.square(vector)),
             np.finfo(float).eps * dy.ones((1))[0],
         ))
     return dy.cdiv(vector, square_sum)
Exemple #17
0
 def word_assoc_score(self, source_idx, target_idx, relation):
     """
     NOTE THAT DROPOUT IS BEING APPLIED HERE
     :param source_idx: embedding index of source atom
     :param target_idx: embedding index of target atom
     :param relation: relation type
     :return: score
     """
     # prepare
     s = self.embeddings[source_idx]
     if self.no_assoc:
         A = dy.const_parameter(self.word_assoc_weights[relation])
     else:
         A = dy.parameter(self.word_assoc_weights[relation])
     dy.dropout(A, self.dropout)
     t = self.embeddings[target_idx]
     
     # compute
     if self.mode == BILINEAR_MODE:
         return dy.transpose(s) * A * t
     elif self.mode == DIAG_RANK1_MODE:
         diag_A = dyagonalize(A[0])
         rank1_BC = A[1] * dy.transpose(A[2])
         ABC = diag_A + rank1_BC
         return dy.transpose(s) * ABC * t
     elif self.mode == TRANSLATIONAL_EMBED_MODE:
         return -dy.l2_norm(s - t + A)
     elif self.mode == DISTMULT:
         return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
Exemple #18
0
  def calc_loss(self, src, db_idx, src_mask=None, trg_mask=None):
    src_embeddings = self.src_embedder.embed_sent(src, mask=src_mask)
    self.src_encoder.set_input(src)
    src_encodings = self.exprseq_pooling(self.src_encoder.transduce(src_embeddings))
    trg_batch, trg_mask = self.database[db_idx]
    # print("trg_mask=\n",trg_mask)
    trg_encodings = self.encode_trg_example(trg_batch, mask=trg_mask)
    dim = trg_encodings.dim()
    trg_reshaped = dy.reshape(trg_encodings, (dim[0][0], dim[1]))
    # ### DEBUG
    # trg_npv = trg_reshaped.npvalue()
    # for i in range(dim[1]):
    #   print("--- trg_reshaped {}: {}".format(i,list(trg_npv[:,i])))
    # ### DEBUG
    prod = dy.transpose(src_encodings) * trg_reshaped
    # ### DEBUG
    # prod_npv = prod.npvalue()
    # for i in range(dim[1]):
    #   print("--- prod {}: {}".format(i,list(prod_npv[0].transpose()[i])))
    # ### DEBUG
    id_range = list(range(len(db_idx)))
    # This is ugly:
    if self.loss_direction == "forward":
      prod = dy.transpose(prod)
      loss = dy.sum_batches(dy.hinge_batch(prod, id_range))
    elif self.loss_direction == "bidirectional":
      prod = dy.reshape(prod, (len(db_idx), len(db_idx)))
      loss = dy.sum_elems(
        dy.hinge_dim(prod, id_range, d=0) + dy.hinge_dim(prod, id_range, d=1))
    else:
      raise RuntimeError("Illegal loss direction {}".format(self.loss_direction))

    return loss
 def copy_src_probs_pick(token_type, token_literal):
     if token_type not in copy_atts:
         return dy.scalarInput(0.0)
     selected_indexes = copy_history[token_type][token_literal]
     if len(selected_indexes) == 0:
         return dy.scalarInput(0.0)
     probs = copy_src_probs(token_type)
     return dy.sum_elems(dy.select_rows(probs, selected_indexes))
Exemple #20
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x]
                                            for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                  dy.pow(mu, dy.inputVector([2])) -
                                  dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
 def copy_src_probs_map(token_type, lazy=False):
     if token_type not in copy_atts:
         return {}
     literal_history = copy_history[token_type]
     if all(len(history) == 0 for history in literal_history.values()):
         return {}
     probs = copy_src_probs(token_type)
     if lazy:
         return {
             literal: dy.sum_elems(dy.select_rows(probs, history))
             for literal, history in literal_history.items()
             if len(history) > 0
         }
     return {
         literal: dy.sum_elems(dy.select_rows(probs, history)).value()
         for literal, history in literal_history.items()
         if len(history) > 0
     }
Exemple #22
0
    def cosine_proximity(self, pred, gold):
        def l2_normalize(x):
            square_sum = dynet.sqrt(dynet.bmax(dynet.sum_elems(dynet.square(x)), np.finfo(float).eps * dynet.ones((1))[0]))
            return dynet.cdiv(x, square_sum)

        y_true = l2_normalize(pred)
        y_pred = l2_normalize(gold)

        return -dynet.sum_elems(dynet.cmult(y_true, y_pred))
Exemple #23
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dy.pick(scores, argmax_score)
     max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                               self.dim_output)
     return max_score_expr + dy.log(
         dy.sum_elems(
             dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
Exemple #24
0
def loss_function(recon_x, x, mu, logvar):
    BCE = dy.binary_log_loss(recon_x, x)  # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar))

    return BCE + KLD
 def calc_loss(self, policy):
     if self.weight < 1e-8:
         return None
     neg_entropy = []
     for i, ll in enumerate(policy):
         if self.valid_pos is not None:
             ll = dy.pick_batch_elems(ll, self.valid_pos[i])
         loss = dy.sum_batches(dy.sum_elems(dy.cmult(dy.exp(ll), ll)))
         neg_entropy.append(dy.sum_batches(loss))
     return self.weight * dy.esum(neg_entropy)
Exemple #26
0
def calculate_confidence(vec, proportions=0.5):
    """
    calculate the value of alpha, the employed metric is GINI index
    :param vec:
    :return:
    """
    square_sum = dy.sum_elems(dy.cmult(vec, vec)).value()
    if not 0 <= square_sum <= 1:
        raise Exception("Invalid square sum %.3lf" % square_sum)
    return (1 - square_sum) * proportions
Exemple #27
0
 def cal_context(self, s, selected=None):
     ws = self.cal_scores(s)
     if selected is None:
         return self.es_matrix * ws, ws
     selected_ws = dy.select_rows(ws, selected)
     selected_ws = dy.cdiv(selected_ws,
                           dy.sum_elems(selected_ws))
     return dy.concatenate_cols(
         [es[index]
          for index in selected]) * selected_ws, ws
Exemple #28
0
def norm_vec(vec):
    """
    normalize a dynet vector expression
    :param vec:
    :return:
    """
    sum_item = dy.sum_elems(vec)
    norm_vec = vec / sum_item.value()
    print(norm_vec.npvalue())
    return norm_vec
Exemple #29
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
Exemple #30
0
    def calc_loss(
        self, model: 'model_base.ConditionedModel',
        src: Union[sent.Sentence, 'batchers.Batch'],
        trg: Union[sent.Sentence,
                   'batchers.Batch']) -> losses.FactoredLossExpr:
        batch_size = trg.batch_size()
        uniques = [set() for _ in range(batch_size)]
        deltas = []
        probs = []
        sign = -1 if self.inv_eval else 1
        search_outputs = model.generate_search_output(src,
                                                      self.search_strategy)
        for search_output in search_outputs:
            assert len(search_output.word_ids) == 1
            assert search_output.word_ids[0].shape == (len(
                search_output.state), )
            logprob = []
            for word, state in zip(search_output.word_ids[0],
                                   search_output.state):
                lpdist = model.decoder.scorer.calc_log_probs(state.as_vector())
                lp = dy.pick(lpdist, word)
                logprob.append(lp)
            sample = search_output.word_ids
            logprob = dy.esum(logprob) * self.alpha
            # Calculate the evaluation score
            eval_score = np.zeros(batch_size, dtype=float)
            mask = np.zeros(batch_size, dtype=float)
            for j in range(batch_size):
                ref_j = self.remove_eos(trg[j].words)
                hyp_j = self.remove_eos(sample[j].tolist())
                if self.unique_sample:
                    hash_val = hash(tuple(hyp_j))
                    if len(hyp_j) == 0 or hash_val in uniques[j]:
                        mask[j] = -1e20  # represents negative infinity
                        continue
                    else:
                        uniques[j].add(hash_val)
                    # Calc evaluation score
                eval_score[j] = self.evaluation_metric.evaluate_one_sent(
                    ref_j, hyp_j) * sign
            # Appending the delta and logprob of this sample
            prob = logprob + dy.inputTensor(mask, batched=True)
            deltas.append(dy.inputTensor(eval_score, batched=True))
            probs.append(prob)
        sample_prob = dy.softmax(dy.concatenate(probs))
        deltas = dy.concatenate(deltas)
        risk = dy.sum_elems(dy.cmult(sample_prob, deltas))

        ### Debug
        #print(sample_prob.npvalue().transpose()[0])
        #print(deltas.npvalue().transpose()[0])
        #print("----------------------")
        ### End debug

        return losses.FactoredLossExpr({"risk": risk})
Exemple #31
0
    def _perform_calc_loss(
        self, model: 'model_base.ConditionedModel',
        src: Union[sent.Sentence, 'batchers.Batch'],
        trg: Union[sent.Sentence,
                   'batchers.Batch']) -> losses.FactoredLossExpr:
        search_outputs = model.generate_search_output(src,
                                                      self.search_strategy)
        sign = -1 if self.inv_eval else 1

        # TODO: Fix units
        total_loss = collections.defaultdict(int)
        for search_output in search_outputs:
            # Calculate rewards
            eval_score = []
            for trg_i, sample_i in zip(trg, search_output.word_ids):
                # Removing EOS
                sample_i = utils.remove_eos(sample_i.tolist(), vocabs.Vocab.ES)
                ref_i = trg_i.words[:trg_i.len_unpadded()]
                score = self.evaluation_metric.evaluate_one_sent(
                    ref_i, sample_i)
                eval_score.append(sign * score)
            reward = dy.inputTensor(eval_score, batched=True)
            # Composing losses
            baseline_loss = []
            cur_losses = []
            for state, mask in zip(search_output.state, search_output.mask):
                bs_score = self.baseline.transform(
                    dy.nobackprop(state.as_vector()))
                baseline_loss.append(dy.squared_distance(reward, bs_score))
                logsoft = model.decoder.scorer.calc_log_probs(
                    state.as_vector())
                loss_i = dy.cmult(logsoft, reward - bs_score)
                cur_losses.append(
                    dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))

            total_loss["polc_loss"] += dy.sum_elems(dy.esum(cur_losses))
            total_loss["base_loss"] += dy.sum_elems(dy.esum(baseline_loss))
        units = [t.len_unpadded() for t in trg]
        total_loss = losses.FactoredLossExpr(
            {k: losses.LossExpr(v, units)
             for k, v in total_loss.items()})
        return losses.FactoredLossExpr({"risk": total_loss})
Exemple #32
0
 def __call__(self, translator, initial_state, src, trg):
     # TODO(philip30): currently only using the best hypothesis / first sample for reinforce loss
     # A small further implementation is needed if we want to do reinforce with multiple samples.
     search_output = translator.search_strategy.generate_output(
         translator, initial_state)[0]
     # Calculate evaluation scores
     self.eval_score = []
     for trg_i, sample_i in zip(trg, search_output.word_ids):
         # Removing EOS
         sample_i = self.remove_eos(sample_i.tolist())
         ref_i = self.remove_eos(trg_i.words)
         # Evaluating
         if len(sample_i) == 0:
             score = 0
         else:
             score = self.evaluation_metric.evaluate(ref_i, sample_i) * \
                     (-1 if self.inv_eval else 1)
         self.eval_score.append(score)
     self.true_score = dy.inputTensor(self.eval_score, batched=True)
     # Composing losses
     loss = LossBuilder()
     if self.use_baseline:
         baseline_loss = []
         losses = []
         for state, logsoft, mask in zip(search_output.state,
                                         search_output.logsoftmaxes,
                                         search_output.mask):
             bs_score = self.baseline(state)
             baseline_loss.append(
                 dy.squared_distance(self.true_score, bs_score))
             loss_i = dy.cmult(logsoft, self.true_score - bs_score)
             losses.append(
                 dy.cmult(loss_i, dy.inputTensor(mask, batched=True)))
         loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses)))
         loss.add_loss("reinf_baseline",
                       dy.sum_elems(dy.esum(baseline_loss)))
     else:
         loss.add_loss(
             "reinforce",
             dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts))))
     return loss
Exemple #33
0
    def __call__(self, logsoftmaxes, mask):
        strength = self.strength.value()
        if strength == 0:
            return 0
        neg_entropy = []
        for i, logsoftmax in enumerate(logsoftmaxes):
            loss = dy.cmult(dy.exp(logsoftmax), logsoftmax)
            if mask is not None:
                loss = dy.cmult(dy.inputTensor(mask[i], batched=True), loss)
            neg_entropy.append(loss)

        return strength * dy.sum_elems(dy.esum(neg_entropy))
Exemple #34
0
def loss_function(recon_x, x, mu, logvar):
    BCE = dy.binary_log_loss(
        recon_x, x
    )  # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) -
                              dy.exp(logvar))

    return BCE + KLD
Exemple #35
0
    def test_layer_norm(self):
        dy.renew_cg()
        x = dy.inputTensor(self.v1)
        g = dy.inputTensor(self.v2)
        b = dy.inputTensor(self.v3)
        y = dy.layer_norm(x,g,b)
        l = dy.sum_elems(y)
        l_value = l.scalar_value()
        l.backward()

        y_np_value = self.v2 / self.v1.std() * (self.v1 - self.v1.mean()) + self.v3

        self.assertTrue(np.allclose(y.npvalue(),y_np_value))
Exemple #36
0
    def test_layer_norm(self):
        dy.renew_cg()
        x = dy.inputTensor(self.v1)
        g = dy.inputTensor(self.v2)
        b = dy.inputTensor(self.v3)
        y = dy.layer_norm(x, g, b)
        loss = dy.sum_elems(y)

        loss.backward()

        centered_v1 = self.v1 - self.v1.mean()
        y_np_value = self.v2 / self.v1.std() * centered_v1 + self.v3

        self.assertTrue(np.allclose(y.npvalue(), y_np_value))
def calc_loss(sents):
    dy.renew_cg()

    src_fwd = LSTM_SRC_FWD.initial_state()
    src_bwd = LSTM_SRC_BWD.initial_state()
    trg_fwd = LSTM_TRG_FWD.initial_state()
    trg_bwd = LSTM_TRG_BWD.initial_state()

    # Encoding
    src_reps = encode_sents(LOOKUP_SRC, src_fwd, src_bwd, [src for src, trg in sents])
    trg_reps = encode_sents(LOOKUP_TRG, trg_fwd, trg_bwd, [trg for src, trg in sents])

    # Concatenate the sentence representations to a single matrix
    mtx_src = dy.concatenate_cols(src_reps)
    mtx_trg = dy.concatenate_cols(trg_reps)

    # Do matrix multiplication to get a matrix of dot product similarity scores
    sim_mtx = dy.transpose(mtx_src) * mtx_trg

    # Calculate the hinge loss over all dimensions 
    loss = dy.hinge_dim(sim_mtx, list(range(len(sents))), d=1)

    return dy.sum_elems(loss)