Example #1
0
    def _embed_word(self, segmented_word, is_batched):
        if self.word_vocab is not None:
            ngram_stats = self.to_ngram_stats(segmented_word.word)
        elif self.char_vocab is not None:
            ngram_stats = self.to_ngram_stats(segmented_word.chars)
        else:
            raise ValueError(
                "Either word vocab or char vocab should not be None")

        not_in = [
            key for key in ngram_stats.keys()
            if key not in self.ngram_vocab.w2i
        ]
        for key in not_in:
            ngram_stats.pop(key)

        if len(ngram_stats) > 0:
            ngrams = [
                self.ngram_vocab.convert(ngram)
                for ngram in ngram_stats.keys()
            ]
            counts = list(ngram_stats.values())
        else:
            ngrams = [self.ngram_vocab.UNK]
            counts = [1]

        input_tensor = dy.sparse_inputTensor([ngrams], counts,
                                             (self.ngram_vocab.vocab_size(), ))
        # Note: If one wants to use CHARAGRAM embeddings, use NonLinear with Relu.
        return self.transform.transform(input_tensor)
 def transduce(self, inputs):
   ngrams = [self.convert(ngram) for ngram in self.word_vect.keys()]
   counts = list(self.word_vect.values())
   if len(ngrams) != 0:
     ngram_vocab_vect = dy.sparse_inputTensor([ngrams], counts, (self.dict_entry,))
     return dy.rectify(self.embedding.transform(ngram_vocab_vect))
   else:
     return None
Example #3
0
 def _choose_rnn_input(self, dec_state, batch_size, prev_ref_action, mode):
     hidden_size = dec_state.context.dim()[0][0]
     vocab_size = self.trg_embedder.vocab_size
     if mode == "context":
         context_vec = dy.reshape(dec_state.context, (hidden_size, ),
                                  batch_size=batch_size)
         ret = context_vec
     elif dec_state.out_prob is None and prev_ref_action is None:
         ret = None
     elif mode == "expected":
         ret = dy.reshape(dec_state.out_prob, (1, vocab_size),
                          batch_size=batch_size) * dy.parameter(
                              self.trg_embedder.embeddings)
         ret = dy.reshape(ret, (hidden_size, ), batch_size=batch_size)
     elif mode in ["argmax", "argmax_st"]:
         gradient_mode = "zero_gradient" if mode == "argmax" else "straight_through_gradient"
         argmax = dy.reshape(dy.argmax(dec_state.out_prob,
                                       gradient_mode=gradient_mode),
                             (1, vocab_size),
                             batch_size=batch_size)
         ret = argmax * dy.parameter(self.trg_embedder.embeddings)
         ret = dy.reshape(ret, (hidden_size, ), batch_size=batch_size)
     elif mode in ["teacher", "split"]:
         do_sample = self.train and dec_state.out_prob and self.sampling_prob > 0.0 and random.random(
         ) < self.sampling_prob
         if not do_sample:
             ret = self.trg_embedder.embed(prev_ref_action)
         else:  # do sample
             sampled_vals = []
             npval = dec_state.out_prob.npvalue()
             for bi in range(batch_size):
                 npval_bi = npval[:, bi] if batch_size > 1 else npval
                 sampled_vals.append(
                     np.random.choice(vocab_size,
                                      p=npval_bi / np.sum(npval_bi)))
             idxs = ([], [])
             for batch_i in range(batch_size):
                 idxs[0].append(sampled_vals[batch_i])
                 idxs[1].append(batch_i)
             argmax = dy.sparse_inputTensor(
                 idxs,
                 values=np.ones(batch_size),
                 shape=(vocab_size, batch_size),
                 batched=True,
             )
             argmax = dy.reshape(argmax, (1, vocab_size),
                                 batch_size=batch_size)
             ret = argmax * dy.parameter(self.trg_embedder.embeddings)
             ret = dy.reshape(ret, (hidden_size, ), batch_size=batch_size)
     else:
         raise ValueError(f"unknown value for mode: {mode}")
     if ret is not None: self._chosen_rnn_inputs.append(ret)
     return ret
Example #4
0
 def on_start_sent(self, src):
   self.coeff = None
   self.dict_prob = None
   
   batch_size = src.batch_size()
   col_size = src.sent_len()
 
   idxs = [(x, j, i) for i in range(batch_size) for j in range(col_size) for x in self.lexicon[src[i][j]].keys()]
   idxs = tuple(map(list, list(zip(*idxs))))
 
   values = [x for i in range(batch_size) for j in range(col_size) for x in self.lexicon[src[i][j]].values()]
   dim = len(self.trg_vocab), col_size, batch_size
   self.lexicon_prob = dy.nobackprop(dy.sparse_inputTensor(idxs, values, dim, batched=True))
Example #5
0
 def test_sparse_inputTensor(self):
     dy.renew_cg()
     input_tensor = self.input_vals.reshape((3, 3, 3, 3))
     input_vals = [input_tensor[0, 0, 0, 0], input_tensor[0, 1, 2, 0]]
     input_indices = ([0, 0], [0, 1], [0, 2], [0, 0])
     x = dy.sparse_inputTensor(input_indices,
                               input_vals, (3, 3, 3, 3),
                               batched=True)
     self.assertEqual(x.dim()[0], (3, 3, 3), msg="Dimension mismatch")
     self.assertEqual(x.dim()[1], 3, msg="Dimension mismatch")
     self.assertTrue(np.allclose(x.npvalue()[0, 0, 0, 0], input_vals[0]),
                     msg="Expression value different from initial value")
     self.assertTrue(np.allclose(x.npvalue()[0, 1, 2, 0], input_vals[1]),
                     msg="Expression value different from initial value")
     self.assertTrue(np.allclose(x.npvalue()[1, 1, 1, 1], 0),
                     msg="Expression value different from initial value")
Example #6
0
    def transduce(self, inputs):
        batch_size = len(self.words)
        word_vects = []
        keys = []
        values = []
        for i, word in enumerate(self.words):
            word_vects.append(self.to_word_vector(word))
        idxs = [(x, i) for i in range(batch_size)
                for x in word_vects[i].keys()]
        idxs = tuple(map(list, list(zip(*idxs))))

        values = [x for i in range(batch_size) for x in word_vects[i].values()]
        ngram_vocab_vect = dy.sparse_inputTensor(
            idxs, values, (self.dict_entry, len(self.words)), batched=True)

        return dy.rectify(self.word_ngram(ngram_vocab_vect))
Example #7
0
    def on_start_sent(self, src):
        batch_size = len(src)
        col_size = len(src[0])

        idxs = [(x, j, i) for i in range(batch_size) for j in range(col_size)
                for x in self.lexicon[src[i][j]].keys()]
        idxs = tuple(map(list, list(zip(*idxs))))

        values = [
            x for i in range(batch_size) for j in range(col_size)
            for x in self.lexicon[src[i][j]].values()
        ]
        self.lexicon_prob = dy.nobackprop(
            dy.sparse_inputTensor(idxs,
                                  values,
                                  (len(self.trg_vocab), col_size, batch_size),
                                  batched=True))
Example #8
0
 def test_sparse_inputTensor(self):
     dy.renew_cg()
     input_tensor = self.input_vals.reshape((3, 3, 3, 3))
     input_vals = [input_tensor[0, 0, 0, 0], input_tensor[0, 1, 2, 0]]
     input_indices = ([0, 0], [0, 1], [0, 2], [0, 0])
     x = dy.sparse_inputTensor(
         input_indices, input_vals, (3, 3, 3, 3), batched=True)
     self.assertEqual(x.dim()[0], (3, 3, 3),
                      msg="Dimension mismatch")
     self.assertEqual(x.dim()[1], 3,
                      msg="Dimension mismatch")
     self.assertTrue(np.allclose(x.npvalue()[0, 0, 0, 0], input_vals[0]),
                     msg="Expression value different from initial value")
     self.assertTrue(np.allclose(x.npvalue()[0, 1, 2, 0], input_vals[1]),
                     msg="Expression value different from initial value")
     self.assertTrue(np.allclose(x.npvalue()[1, 1, 1, 1], 0),
                     msg="Expression value different from initial value")
Example #9
0
  def calc_nll(self, src, trg):
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg])

    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.mode=="avg_mlp":
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]),
                                 dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1]
      elif self.mode=="final_mlp":
        encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
      scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode=="lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True))
        enc_lin.append(step_linear)
      if encodings.mask:
        encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                      dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
      else:
        encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
      scores = dy.logistic(encoding_fixed_size)

    else: raise ValueError(f"unknown mode '{self.mode}'")

    idxs = ([], [])
    for batch_i in range(trg.batch_size()):
      for word in set(trg[batch_i]):
        if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}:
          idxs[0].append(word)
          idxs[1].append(batch_i)
    trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, )
    loss_expr = dy.binary_log_loss(scores, trg_scores)
    return loss_expr
def calc_loss(sent, epsilon=0.0):
    #dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    tags = sent[1]

    # initialize the LSTM
    init_state_src = lstm_encode.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([embed[x]
                                            for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mu_tweet = dy.parameter(W_mu_tweet_p)
    V_mu_tweet = dy.parameter(V_mu_tweet_p)
    b_mu_tweet = dy.parameter(b_mu_tweet_p)

    W_sig_tweet = dy.parameter(W_sig_tweet_p)
    V_sig_tweet = dy.parameter(V_sig_tweet_p)
    b_sig_tweet = dy.parameter(b_sig_tweet_p)

    # Compute tweet encoding
    mu_tweet = dy.dropout(mlp(src_output, W_mu_tweet, V_mu_tweet, b_mu_tweet),
                          DROPOUT)
    log_var_tweet = dy.dropout(
        mlp(src_output, W_sig_tweet, V_sig_tweet, b_sig_tweet), DROPOUT)

    W_mu_tag = dy.parameter(W_mu_tag_p)
    V_mu_tag = dy.parameter(V_mu_tag_p)
    b_mu_tag = dy.parameter(b_mu_tag_p)

    W_sig_tag = dy.parameter(W_sig_tag_p)
    V_sig_tag = dy.parameter(V_sig_tag_p)
    b_sig_tag = dy.parameter(b_sig_tag_p)

    # Compute tag encoding
    tags_tensor = dy.sparse_inputTensor([tags], np.ones((len(tags), )),
                                        (NUM_TAGS, ))

    mu_tag = dy.dropout(mlp(tags_tensor, W_mu_tag, V_mu_tag, b_mu_tag),
                        DROPOUT)
    log_var_tag = dy.dropout(mlp(tags_tensor, W_sig_tag, V_sig_tag, b_sig_tag),
                             DROPOUT)

    # Combine encodings for mean and diagonal covariance
    W_mu = dy.parameter(W_mu_p)
    b_mu = dy.parameter(b_mu_p)

    W_sig = dy.parameter(W_sig_p)
    b_sig = dy.parameter(b_sig_p)

    # Slowly phase out getting both inputs
    if random.random() < epsilon:
        mask = dy.zeros(HIDDEN_DIM)
    else:
        mask = dy.ones(HIDDEN_DIM)

    if random.random() < 0.5:
        mu_tweet = dy.cmult(mu_tweet, mask)
        log_var_tweet = dy.cmult(log_var_tweet, mask)
    else:
        mu_tag = dy.cmult(mu_tag, mask)
        log_var_tag = dy.cmult(log_var_tag, mask)

    mu = dy.affine_transform([b_mu, W_mu, dy.concatenate([mu_tweet, mu_tag])])
    log_var = dy.affine_transform(
        [b_sig, W_sig,
         dy.concatenate([log_var_tweet, log_var_tag])])

    # KL-Divergence loss computation
    kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                  dy.pow(mu, dy.inputVector([2])) -
                                  dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = lstm_decode.initial_state().set_s([z, dy.tanh(z)])
    prev_word = src[0]
    W_sm = dy.parameter(W_tweet_softmax_p)
    b_sm = dy.parameter(b_tweet_softmax_p)

    for next_word in src[1:]:
        # feed the current state into the

        current_state = current_state.add_input(embed[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])

        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        # Slowly phase out teacher forcing (this may be slow??)
        if random.random() < epsilon:
            p = dy.softmax(s).npvalue()
            prev_word = np.random.choice(VOCAB_SIZE, p=p / p.sum())
        else:
            prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    W_hidden = dy.parameter(W_hidden_p)
    b_hidden = dy.parameter(b_hidden_p)

    W_out = dy.parameter(W_tag_output_p)
    b_out = dy.parameter(b_tag_output_p)

    h = dy.dropout(dy.tanh(b_hidden + W_hidden * z), DROPOUT)
    o = dy.logistic(b_out + W_out * h)

    crossentropy_loss = dy.binary_log_loss(o, tags_tensor)

    return kl_loss, softmax_loss, crossentropy_loss
def hallucinate_tweet(given_tags):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    tags = given_tags

    # initialize the LSTM
    #init_state_src = lstm_encode.initial_state()

    # get the output of the first LSTM
    #src_output = init_state_src.add_inputs([embed[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    #W_mu_tweet = dy.parameter(W_mu_tweet_p)
    #V_mu_tweet = dy.parameter(V_mu_tweet_p)
    #b_mu_tweet = dy.parameter(b_mu_tweet_p)

    #W_sig_tweet = dy.parameter(W_sig_tweet_p)
    #V_sig_tweet = dy.parameter(V_sig_tweet_p)
    #b_sig_tweet = dy.parameter(b_sig_tweet_p)

    # Compute tweet encoding
    #mu_tweet      = mlp(src_output, W_mu_tweet,  V_mu_tweet,  b_mu_tweet)
    #log_var_tweet = mlp(src_output, W_sig_tweet, V_sig_tweet, b_sig_tweet)

    W_mu_tag = dy.parameter(W_mu_tag_p)
    V_mu_tag = dy.parameter(V_mu_tag_p)
    b_mu_tag = dy.parameter(b_mu_tag_p)

    W_sig_tag = dy.parameter(W_sig_tag_p)
    V_sig_tag = dy.parameter(V_sig_tag_p)
    b_sig_tag = dy.parameter(b_sig_tag_p)

    # Compute tag encoding
    tags_tensor = dy.sparse_inputTensor([tags], np.ones((len(tags), )),
                                        (NUM_TAGS, ))

    mu_tag = dy.dropout(mlp(tags_tensor, W_mu_tag, V_mu_tag, b_mu_tag),
                        DROPOUT)
    log_var_tag = dy.dropout(mlp(tags_tensor, W_sig_tag, V_sig_tag, b_sig_tag),
                             DROPOUT)

    # Combine encodings for mean and diagonal covariance
    W_mu = dy.parameter(W_mu_p)
    b_mu = dy.parameter(b_mu_p)

    W_sig = dy.parameter(W_sig_p)
    b_sig = dy.parameter(b_sig_p)

    mu_tweet = dy.zeros(HIDDEN_DIM)
    log_var_tweet = dy.zeros(HIDDEN_DIM)

    mu = dy.affine_transform([b_mu, W_mu, dy.concatenate([mu_tweet, mu_tag])])
    log_var = dy.affine_transform(
        [b_sig, W_sig,
         dy.concatenate([log_var_tweet, log_var_tag])])

    # KL-Divergence loss computation
    kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                  dy.pow(mu, dy.inputVector([2])) -
                                  dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = lstm_decode.initial_state().set_s([z, dy.tanh(z)])
    prev_word = vocab[START]
    W_sm = dy.parameter(W_tweet_softmax_p)
    b_sm = dy.parameter(b_tweet_softmax_p)

    gen_tweet = []
    for i in range(20):
        # feed the current state into the
        current_state = current_state.add_input(embed[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        p = dy.softmax(s).npvalue()
        next_word = np.random.choice(VOCAB_SIZE, p=p / p.sum())
        gen_tweet.append(next_word)
        prev_word = next_word

    return gen_tweet
Example #12
0
 def transduce(self, inputs):
   keys = list(self.word_vect.keys())
   values = list(self.word_vect.values())
   ngram_vocab_vect = dy.sparse_inputTensor([keys], values, (self.dict_entry,))
   return dy.rectify(self.word_ngram.transform(ngram_vocab_vect))