Esempio n. 1
0
  def on_calc_additional_loss(self, translator_loss):
    if not self.learn_segmentation or self.segment_decisions is None:
      return None
    reward = -translator_loss["mle"]
    if not self.log_reward:
      reward = dy.exp(reward)
    reward = dy.nobackprop(reward)

    # Make sure that reward is not scalar, but rather based on the each batch item
    assert reward.dim()[1] == len(self.src_sent)
    # Mask
    enc_mask = self.enc_mask.get_active_one_mask().transpose() if self.enc_mask is not None else None
    # Compose the lose
    ret = LossBuilder()
    ## Length prior
    alpha = self.length_prior_alpha.value() if self.length_prior_alpha is not None else 0
    if alpha > 0:
      reward += self.segment_length_prior * alpha
    # reward z-score normalization
    if self.z_normalization:
      reward = dy.cdiv(reward-dy.mean_batches(reward), dy.std_batches(reward) + EPS)
    ## Baseline Loss
    if self.use_baseline:
      baseline_loss = []
      for i, baseline in enumerate(self.bs):
        loss = dy.squared_distance(reward, baseline)
        if enc_mask is not None:
          loss = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), loss)
        baseline_loss.append(loss)

      ret.add_loss("Baseline", dy.esum(baseline_loss))

    if self.print_sample:
      print(dy.exp(self.segment_logsoftmaxes[i]).npvalue().transpose()[0])
    ## Reinforce Loss
    lmbd = self.lmbd.value()
    if lmbd > 0.0:
      reinforce_loss = []
      # Calculating the loss of the baseline and reinforce
      for i in range(len(self.segment_decisions)):
        ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i])
        if self.use_baseline:
          r_i = reward - dy.nobackprop(self.bs[i])
        else:
          r_i = reward
        if enc_mask is not None:
          ll = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), ll)
        reinforce_loss.append(r_i * -ll)
      loss = dy.esum(reinforce_loss) * lmbd
      ret.add_loss("Reinforce", loss)
    if self.confidence_penalty:
      ls_loss = self.confidence_penalty(self.segment_logsoftmaxes, enc_mask)
      ret.add_loss("Confidence Penalty", ls_loss)
    # Total Loss
    return ret
Esempio n. 2
0
def reparameterize(mu, logvar):
    # Get z by reparameterization.
    d = mu.dim()[0][0]
    eps = dy.random_normal(d)
    std = dy.exp(logvar * 0.5)

    return mu + dy.cmult(std, eps)
Esempio n. 3
0
 def reparameterize(self, mu, logvar):
     if self.training:
         std = dy.exp(logvar * 0.5)
         eps = dy.random_normal(dim=std.dim()[0], mean=0.0, stddev=1.0)
         return dy.cmult(eps, std) + mu
     else:
         return mu
Esempio n. 4
0
def log_sum_exp(scores, n_tags):
    npval = scores.npvalue()
    argmax_score = np.argmax(npval)
    max_score_expr = dy.pick(scores, argmax_score)
    max_score_expr_broadcast = dy.concatenate([max_score_expr] * n_tags)
    return max_score_expr + dy.log(
        dy.sum_cols(dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
Esempio n. 5
0
def reparameterize(mu, logvar):
    # Get z by reparameterization.
    d = mu.dim()[0][0]
    eps = dy.random_normal(d)
    std = dy.exp(logvar * 0.5)

    return mu + dy.cmult(std, eps)
def test_item(model, sentence):
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        if len(encoded_sequence) > 0:
            att_mlp_outputs = []
            for e in encoded_sequence:
                mlp_out = (model.attention_w * e) + model.attention_b
                att_mlp_outputs.append(mlp_out)

            lst = []
            for o in att_mlp_outputs:
                lst.append(dy.exp(dy.sum_elems(dy.cmult(o,
                                                        model.att_context))))

            sum_all = dy.esum(lst)

            probs = [dy.cdiv(e, sum_all) for e in lst]
            att_context = dy.esum(
                [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)])
            context = dy.concatenate([att_context, global_max, global_min])
            y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)
            sentence.prediction_result = y_pred.scalar_value()
            dy.renew_cg()
            return sentence.prediction_result
    return 0
def softmax(x):
    """
    Compute the softmax function in tensorflow.

    You might find the tensorflow functions tf.exp, tf.reduce_max,
    tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
    not need to use all of these functions). Recall also that many common
    tensorflow operations are sugared (e.g. x * y does a tensor multiplication
    if x and y are both tensors). Make sure to implement the numerical stability
    fixes as in the previous homework!

    Args:
        x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
                  represented by row-vectors. (For simplicity, no need to handle 1-d
                  input as in the previous homework)
    Returns:
        out: tf.Tensor with shape (n_sample, n_features). You need to construct this
                  tensor in this problem.
    """

    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp))

    out = dy.cdiv(x_exp, sum_exp)
    ### END YOUR CODE

    return out
Esempio n. 8
0
    def span_parser(self, sentence, is_train, elmo_embeddings, cur_word_index, gold=None):
        if gold is not None:
            assert isinstance(gold, ParseNode)

        lstm_outputs = self._featurize_sentence(sentence, is_train=is_train,
                                                elmo_embeddings=elmo_embeddings,
                                                cur_word_index=cur_word_index)
        encodings = []
        span_to_index = {}
        for start in range(0, len(sentence)):
            for end in range(start + 1, len(sentence) + 1):
                span_to_index[(start, end)] = len(encodings)
                encodings.append(self._get_span_encoding(start, end, lstm_outputs))
        label_log_probabilities = self._encodings_to_label_log_probabilities(encodings)

        total_loss = dy.zeros(1)
        if is_train:
            for start in range(0, len(sentence)):
                for end in range(start + 1, len(sentence) + 1):
                    gold_label = gold.oracle_label(start, end)
                    gold_label_index = self.label_vocab.index(gold_label)
                    index = span_to_index[(start, end)]
                    total_loss -= label_log_probabilities[gold_label_index][index]
            return None, total_loss
        else:
            label_log_probabilities_np = label_log_probabilities.npvalue()
            tree, additional_info = optimal_parser(label_log_probabilities_np,
                                                   span_to_index,
                                                   sentence,
                                                   self.empty_label_index,
                                                   self.label_vocab,
                                                   gold)
            return tree, additional_info, dy.exp(label_log_probabilities).npvalue()
Esempio n. 9
0
    def train(self, mini_batch, num_train, k):
        words, pos_tags, chars, langs, signs, masks = mini_batch
        # Getting the last hidden layer from BiLSTM.
        rnn_out = self.rnn_mlp(mini_batch, True)
        h_out = rnn_out[-1]
        t_out_d = dy.reshape(h_out, (h_out.dim()[0][0], h_out.dim()[1]))
        t_out = dy.transpose(t_out_d)

        # Calculating the kq values for NCE.
        kq = dy.scalarInput(float(k) / num_train)
        lkq = dy.log(kq)

        loss_values = []
        for i in range(len(langs)):
            for j in range(i + 1, len(langs)):
                if (langs[i] != langs[j]) and (signs[i] == 1 or signs[j] == 1):
                    lu = -dy.squared_distance(t_out[i], t_out[j])
                    denom = dy.log(dy.exp(lu) + kq)
                    if signs[i] == signs[j]:  # both one
                        nom = lu
                    else:
                        nom = lkq
                    loss_values.append(denom - nom)

        err_value = 0
        if len(loss_values) > 0:
            err = dy.esum(loss_values) / len(loss_values)
            err.forward()
            err_value = err.value()
            err.backward()
            self.trainer.update()
        dy.renew_cg()
        return err_value
Esempio n. 10
0
    def _policy_shape_probs(self,
                            prob_dist):
        # TODO: this is specific to Alchemy
        num_actions = len(self.output_action_vocabulary) - 1
        num_locations = len(self.output_location_vocabulary) - 1
        num_arguments = len(self.output_argument_vocabulary) - 1
        new_probdist = dy.zeros(prob_dist.dim()[0])
        zeroes = numpy.zeros(num_locations * num_arguments)
        ones = numpy.ones(num_locations * num_arguments)
        eos_prob = prob_dist[self._all_output_vocabulary.lookup_index((EOS, NO_ARG, NO_ARG))]
        action_idx = 0
        for action in self.output_action_vocabulary:
            masks = numpy.concatenate(
                        (numpy.repeat(zeroes, action_idx),
                         ones,
                         numpy.repeat(zeroes, num_actions - action_idx - 1)))
            actions_masks = dy.reshape(dy.inputTensor(masks),
                                       (num_actions * num_locations * num_arguments, 1))
            if action == EOS:
                new_probdist += dy.cmult(actions_masks, prob_dist) / 2.
            elif action == "push":
                new_probdist += dy.cmult(actions_masks, prob_dist) + eos_prob / (2. * 56.)
            elif action == "pop":
                new_probdist += dy.cmult(actions_masks, prob_dist)

        if self.args.syntax_restricted:
            return dy.exp(dy.log_softmax(dy.cmult(new_probdist, prob_dist),
                                         restrict = self._valid_action_indices))
        else:
            return dy.softmax(dy.cmult(new_probdist, prob_dist))
Esempio n. 11
0
    def intra_sent_attend(self, vecs):
        numVecs = len(vecs)
        fVecs = [dt.tanh(self.SelIntraFW * v) for v in vecs]
        expE = []
        for i, fq in enumerate(fVecs):
            row = []
            for j, fc in enumerate(fVecs):
                row.append(
                    dt.exp(
                        dt.dot_product(fq, fc) +
                        self.SelIntraBias[i - j +
                                          int(config.d["DIST_BIAS_DIM"] / 2)]))
            expE.append(row)

        invSumExpE = []
        for i in xrange(numVecs):
            invSumExpE.append(dt.pow(dt.esum(expE[i]), dt.scalarInput(-1)))

        alpha = []
        for i in xrange(numVecs):
            s = dt.esum([vecs[j] * expE[i][j] for j in xrange(numVecs)])
            alpha.append(s * invSumExpE[i])

        return [
            dt.tanh(self.SelIntraHW * dt.concatenate([v, a]))
            for v, a in zip(vecs, alpha)
        ]
Esempio n. 12
0
 def reparameterize(self, mu, logvar):
     if self.training:
         std = dy.exp(logvar * 0.5)
         eps = dy.random_normal(dim=std.dim()[0], mean=0.0, stddev=1.0)
         return dy.cmult(eps, std) + mu
     else:
         return mu
Esempio n. 13
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x]
                                            for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                  dy.pow(mu, dy.inputVector([2])) -
                                  dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
Esempio n. 14
0
    def decomp_attend(self, vecsA, vecsB):
        # Fq^T Fc -> need to expedite using native matrix/tensor multiplication
        Fq = vecsA  # the original word vector, not yet passing a NN as in Eq.1, # need a function F
        Fc = vecsB  # need a function F

        expE = []
        for fq in Fq:
            row = []
            for fc in Fc:
                row.append(dt.exp(dt.dot_product(fq, fc)))
            expE.append(row)
        #print ("debug: expE", expE[0][0].value())

        invSumExpEi = []
        for i in xrange(len(Fq)):
            invSumExpEi.append(dt.pow(dt.esum(expE[i]), dt.scalarInput(-1)))

        invSumExpEj = []
        for j in xrange(len(Fc)):
            invSumExpEj.append(
                dt.pow(dt.esum([expE[i][j] for i in xrange(len(Fq))]),
                       dt.scalarInput(-1)))

        beta = []
        for i in xrange(len(Fq)):
            s = dt.esum([Fc[j] * expE[i][j] for j in xrange(len(Fc))])
            beta.append(s * invSumExpEi[i])
        #print("debug: beta", beta[0].value())

        alpha = []
        for j in xrange(len(Fc)):
            s = dt.esum([Fc[j] * expE[i][j] for i in xrange(len(Fq))])
            alpha.append(s * invSumExpEj[j])
        #print("debug: alpha", alpha[0].value())

        # Compare
        v1i = [
            dt.logistic(dt.concatenate([Fq[i], beta[i]]))
            for i in xrange(len(Fq))
        ]  # need a function G
        v2j = [
            dt.logistic(dt.concatenate([Fc[j], alpha[j]]))
            for j in xrange(len(Fc))
        ]  # need a function G

        #print ("debug: v1i", v1i[0].value())
        #print ("debug: v2j", v2j[0].value())

        # Aggregate

        v1 = dt.esum(v1i)
        v2 = dt.esum(v2j)

        #print ("debug: v1.value()", v1.value())
        #print ("debug: v2.value()", v2.value())

        #colScore = dt.logistic(dt.dot_product(self.SelHW, dt.concatenate([v1,v2])))
        return dt.dot_product(v1, v2)
Esempio n. 15
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dy.pick(scores, argmax_score)
     max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                               self.dim_output)
     return max_score_expr + dy.log(
         dy.sum_elems(
             dy.transpose(dy.exp(scores - max_score_expr_broadcast))))
 def log_sum_exp(tag_score_arr):
     argmax = np.argmax(tag_score_arr.value())
     max_score = tag_score_arr[argmax]
     score = max_score
     max_arr = dynet.concatenate(
         [max_score for i in range(len(self.pos) + 2)])
     score += dynet.log(
         dynet.sum_dim(dynet.exp(tag_score_arr - max_arr), [0]))
     return score
Esempio n. 17
0
def loss_function(recon_x, x, mu, logvar):
    BCE = dy.binary_log_loss(recon_x, x)  # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar))

    return BCE + KLD
Esempio n. 18
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dy.pick(scores, argmax_score)
     max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                               self.tagset_size)
     return max_score_expr + dy.log(
         dy.sum_dim(
             dy.transpose(dy.exp(scores - max_score_expr_broadcast)),
             [1]))
Esempio n. 19
0
 def calc_loss(self, policy):
     if self.weight < 1e-8:
         return None
     neg_entropy = []
     for i, ll in enumerate(policy):
         if self.valid_pos is not None:
             ll = dy.pick_batch_elems(ll, self.valid_pos[i])
         loss = dy.sum_batches(dy.sum_elems(dy.cmult(dy.exp(ll), ll)))
         neg_entropy.append(dy.sum_batches(loss))
     return self.weight * dy.esum(neg_entropy)
Esempio n. 20
0
 def log_sum_exp(scores):
     npval = scores.npvalue()
     argmax_score = np.argmax(npval)
     max_score_expr = dynet.pick(scores, argmax_score)
     max_score_expr_broadcast = dynet.concatenate([max_score_expr] *
                                                  (self.n_tags + 2))
     return max_score_expr + dynet.log(
         dynet.sum_cols(
             dynet.transpose(
                 dynet.exp(scores - max_score_expr_broadcast))))
Esempio n. 21
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
def softmax(x):
    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    x_sum = dy.sum_cols(x_exp)
    x_tmp = dy.zeroes(x.dim()[0])
    x_tmp = dy.colwise_add(x_tmp, x_sum)
    out = dy.cdiv(x_exp, x_tmp)
    ### END YOUR CODE
    return out
Esempio n. 23
0
def selu(x):
    """ :type x: dn.Expression
        :rtype: dn.Expression """
    positive = dn.rectify(x)
    positive_indicator = dn.rectify(dn.cdiv(positive, positive + epsilon))
    negative = -dn.rectify(-x)
    exp_negative = dn.exp(negative) - positive_indicator
    exp_negative_minus_alpha = exp_negative * alpha - alpha + positive_indicator * alpha
    # x>0: x=x * scale; x<0: x = (alpha * exp(x) - alpha) * scale
    ret = (positive + exp_negative_minus_alpha) * scale
    return ret
Esempio n. 24
0
 def marginals(self,
               inside_chart,
               outside_chart,
               lognormalizer,
               semiring=LogProbSemiring):
     marginals = {}
     for node in inside_chart:
         marginals[node] = dy.exp(
             semiring.division(
                 semiring.product(inside_chart[node], outside_chart[node]),
                 lognormalizer))
     return marginals
Esempio n. 25
0
def loss_function(recon_x, x, mu, logvar):
    BCE = dy.binary_log_loss(
        recon_x, x
    )  # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) -
                              dy.exp(logvar))

    return BCE + KLD
Esempio n. 26
0
    def __call__(self, logsoftmaxes, mask):
        strength = self.strength.value()
        if strength == 0:
            return 0
        neg_entropy = []
        for i, logsoftmax in enumerate(logsoftmaxes):
            loss = dy.cmult(dy.exp(logsoftmax), logsoftmax)
            if mask is not None:
                loss = dy.cmult(dy.inputTensor(mask[i], batched=True), loss)
            neg_entropy.append(loss)

        return strength * dy.sum_elems(dy.esum(neg_entropy))
Esempio n. 27
0
def calc_reinforce_loss(words, tags, delta):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    #calculate the probability distribution
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]
    losses = [
        dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)
    ]
    probs = [-dy.exp(loss).as_array() for loss in losses]

    #then take samples from the probability distribution
    samples = [np.random.choice(range(len(x)), p=x) for x in probs]

    #calculate accuracy=reward
    correct = [sample == tag for sample, tag in zip(samples, tags)]
    r_i = float(sum(correct)) / len(correct)
    r = dy.constant((1), r_i)
    # Reward baseline for each word
    W_bl = dy.parameter(W_bl_p)
    b_bl = dy.parameter(b_bl_p)
    r_b = [
        dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps
    ]

    #we need to take the value in order to break the computation graph
    #as the reward portion is trained seperatley and not backpropogated through during the overall score
    rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b]
    #the scores for training the baseline
    baseline_scores = [dy.square(r - x) for x in r_b]

    #then calculate the reinforce scores using reinforce
    reinforce_scores = [
        r_s * score for r_s, score in zip(rewards_over_baseline, scores)
    ]

    #we want the first len(sent)-delta scores from xent then delta scores from reinforce
    #for mixer
    if len(scores) > delta:
        mixer_scores = scores[:len(scores) - delta] + reinforce_scores[delta -
                                                                       1:]
    else:
        mixer_scores = reinforce_scores
    return dy.esum(mixer_scores), dy.esum(baseline_scores)
Esempio n. 28
0
def log_sum_exp_dim_0(x):
    # numerically stable log_sum_exp
    dims = x.dim()
    max_score = dy.max_dim(x, 0)  # (dim_1, batch_size)
    if len(dims[0]) == 1:
        max_score_extend = max_score
    else:
        max_score_reshape = dy.reshape(max_score, (1, dims[0][1]), batch_size=dims[1])
        max_score_extend = dy.concatenate([max_score_reshape] * dims[0][0])
    x = x - max_score_extend
    exp_x = dy.exp(x)
    # (dim_1, batch_size), if no dim_1, return ((1,), batch_size)
    log_sum_exp_x = dy.log(dy.mean_dim(exp_x, d=[0], b=False) * dims[0][0])
    return log_sum_exp_x + max_score
Esempio n. 29
0
    def log_sum_exp(self, scores):
        """

        :param scores: observation scores for all possible tag sequences
        :return: \log (\sum(exp(S(y))))
        """
        scores_val = scores.npvalue()
        max_idx = np.argmax(scores_val)
        # introduce max_scores to avoid underflow
        # if not, the results will be INF or -INF
        # dynet expression of maximum scores
        max_score = dy.pick(scores, max_idx)
        max_score_broadcast = dy.concatenate([max_score] * (self.dim_ts_y + 2))
        # shift the center of exponential sum to (scores - max)
        return max_score + dy.log(dy.sum_elems(dy.transpose(dy.exp(scores - max_score_broadcast))))
    def backward(self, word_vectors, label):
        dy.renew_cg()
        x = dy.inputTensor(word_vectors)
        y = dy.inputTensor(label)
        logit = self.build_graph(x)

        # q表示对正样本的加权
        # 公式见https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits
        q = 15
        l = 1 + (q - 1) * y
        loss = (1 - y) * logit + l * (dy.log(1 + dy.exp(-dy.abs(logit))) +
                                      dy.rectify(-logit))
        res = loss.value()
        loss.backward()
        return res
    def pz(self, eq):
        """
		Gumbel softmax on distribution over z.
		"""
        W = dy.parameter(self.W)
        prob = dy.softmax(W * eq)
        gumbel = dy.random_gumbel(self.num_clusters)
        y = []
        denom = []
        for z in range(self.num_clusters):
            pi_i = prob[z]
            g_i = gumbel[z]
            val = dy.exp((dy.log(pi_i) + g_i) / self.temp)
            denom.append(val)
        denom = dy.esum(denom)

        for z in range(self.num_clusters):
            pi_i = prob[z]
            g_i = gumbel[z]
            numerator = dy.exp((dy.log(pi_i) + g_i) / self.temp)
            y.append(dy.cdiv(numerator, denom))

        logits = dy.concatenate(y)
        return logits
Esempio n. 32
0
    def calc_loss_basic(self, frames, label):

        # Renew the computation graph
        dy.renew_cg()

        # Initialize LSTM
        init_state_src = self.lstm_builder.initial_state()

        # Instantiate the params
        W_mean = dy.parameter(self.W_mean_p)
        V_mean = dy.parameter(self.V_mean_p)
        b_mean = dy.parameter(self.b_mean_p)
        W_var = dy.parameter(self.W_var_p)
        V_var = dy.parameter(self.V_var_p)
        b_var = dy.parameter(self.b_var_p)

        input_frames = dy.inputTensor(frames)
        output_label = label

        # Get the LSTM embeddings
        src_output = init_state_src.add_inputs(
            [frame for frame in input_frames])[-1].output()

        # Get the mean and diagonal log covariance from the encoder
        mu = self.mlp(src_output, W_mean, V_mean, b_mean)
        log_var = self.mlp(src_output, W_mean, V_mean, b_mean)

        # Compute the KL Divergence loss
        kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                      dy.pow(mu, dy.inputVector([2])) -
                                      dy.exp(log_var))

        # Reparameterize
        z = self.reparameterize(mu, log_var)

        W_sm = dy.parameter(self.W_sm_p)
        b_sm = dy.parameter(self.b_sm_p)

        # Calculate the reconstruction loss
        pred = dy.affine_transform([b_sm, W_sm, z])
        label_embedding = self.lookup[label]
        #print label, label_embedding
        recons_loss = dy.pickneglogsoftmax(pred, label)

        return kl_loss, recons_loss
Esempio n. 33
0
def max_margin_weighting(instance, pred_states, pred_scores_v):
    pred_scores_v = np.array(pred_scores_v)
    assert len(pred_states) == len(pred_scores_v)
    # assert len(instance.states) == len(pred_states[0])

    correct_denotations = []
    for i, states in enumerate(pred_states):
        if states[-1] == instance.states[-1]:
            correct_denotations.append(i)

    if not correct_denotations:
        weights = np.zeros_like(pred_scores_v)
    else:
        weights = dy.exp(
            dy.log_softmax(dy.inputVector(pred_scores_v),
                           correct_denotations)).npvalue()

    return weights, correct_denotations
Esempio n. 34
0
def calc_reinforce_loss(words, tags, delta):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    #calculate the probability distribution 
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]
    losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)]
    probs = [-dy.exp(loss).as_array() for loss in losses]

    #then take samples from the probability distribution
    samples = [np.random.choice(range(len(x)), p=x) for x in probs]

    #calculate accuracy=reward
    correct = [sample == tag for sample, tag in zip(samples, tags)]
    r_i = float(sum(correct))/len(correct)
    r = dy.constant((1), r_i)
    # Reward baseline for each word
    W_bl = dy.parameter(W_bl_p)
    b_bl = dy.parameter(b_bl_p)
    r_b = [dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps]

    #we need to take the value in order to break the computation graph
    #as the reward portion is trained seperatley and not backpropogated through during the overall score
    rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b]
    #the scores for training the baseline
    baseline_scores = [dy.square(r - x) for x in r_b]

    #then calculate the reinforce scores using reinforce
    reinforce_scores = [r_s*score for r_s, score in zip(rewards_over_baseline, scores)]

    #we want the first len(sent)-delta scores from xent then delta scores from reinforce
    #for mixer
    if len(scores) > delta:
        mixer_scores = scores[:len(scores)-delta] + reinforce_scores[delta-1:]
    else:
        mixer_scores = reinforce_scores
    return dy.esum(mixer_scores), dy.esum(baseline_scores)
Esempio n. 35
0
  def sample(self, x: dy.Expression, n: numbers.Integral, temperature: numbers.Real=1.0):
    assert temperature != 0.0
    scores_expr = self.calc_log_probs(x)
    if temperature != 1.0:
      scores_expr *= 1.0 / temperature
      scores = dy.softmax(scores_expr).npvalue()
    else:
      scores = dy.exp(scores_expr).npvalue()

    # Numpy is very picky. If the sum is off even by 1e-8 it complains.
    scores /= sum(scores)

    a = range(scores.shape[0])
    samples = np.random.choice(a, (n,), replace=True, p=scores)

    r = []
    for word in samples:
      r.append((word, dy.pick(scores_expr, word)))
    return r