Exemple #1
0
 def add_input(self, dec_state: RNNGDecoderState,
               actions: List[sent.RNNGAction]):
     action = actions[0] if batchers.is_batched(actions) else actions
     action_type = action.action_type
     if action_type == sent.RNNGAction.Type.GEN:
         # Shifting the embedding of a word
         if self.shift_from_enc:
             # Feed in the decoder based on input string
             return self._perform_gen(dec_state,
                                      self.sent_enc[dec_state.word_read])
         else:
             # Feed in the decoder based on the previously generated output / oracle output
             return self._perform_gen(
                 dec_state,
                 self.term_embedder.embed(action.action_content),
                 finish_generating=action.action_content == vocabs.Vocab.ES)
     elif action_type == sent.RNNGAction.Type.REDUCE_LEFT or \
          action_type == sent.RNNGAction.Type.REDUCE_RIGHT:
         # Perform Reduce on Left direction or right direction
         return self._perform_reduce(
             dec_state, action == sent.RNNGAction.Type.REDUCE_LEFT,
             action.action_content)
     elif action_type == sent.RNNGAction.Type.NT:
         # Shifting the embedding of the NT's head
         return self._perform_nt(dec_state, action.action_content)
     elif action_type == sent.RNNGAction.Type.REDUCE_NT:
         return self._perform_reduce_nt(dec_state)
     elif action_type == sent.RNNGAction.Type.NONE:
         return dec_state
     else:
         raise NotImplementedError("Unimplemented for action word:", action)
Exemple #2
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \
            -> tt.Tensor:
        if not batchers.is_batched(src):
            src = batchers.ListBatch([src])

        src_inputs = batchers.ListBatch(
            [s[:-1] for s in src],
            mask=batchers.Mask(src.mask.np_arr[:, :-1]) if src.mask else None)
        src_targets = batchers.ListBatch(
            [s[1:] for s in src],
            mask=batchers.Mask(src.mask.np_arr[:, 1:]) if src.mask else None)

        event_trigger.start_sent(src)
        embeddings = self.src_embedder.embed_sent(src_inputs)
        encodings = self.rnn.transduce(embeddings)
        encodings_tensor = encodings.as_tensor()

        encoding_reshaped = tt.merge_time_batch_dims(encodings_tensor)
        seq_len = tt.sent_len(encodings_tensor)
        batch_size = tt.batch_size(encodings_tensor)

        outputs = self.transform.transform(encoding_reshaped)

        ref_action = np.asarray([sent.words for sent in src_targets]).reshape(
            (seq_len * batch_size, ))
        loss_expr_perstep = self.scorer.calc_loss(
            outputs, batchers.mark_as_batch(ref_action))

        loss_expr_perstep = tt.unmerge_time_batch_dims(loss_expr_perstep,
                                                       batch_size)

        loss = tt.aggregate_masked_loss(loss_expr_perstep, src_targets.mask)

        return loss
Exemple #3
0
 def _initial_state(self, src):
     if batchers.is_batched(src):
         src = src[0]
     if type(src) == sent.CompoundSentence:
         src = src.sents[0]
     self.src_encoding = self.encoder.transduce(
         self.src_embedder.embed_sent(src))
     return SimultaneousState(self, encoder_state=None, decoder_state=None)
Exemple #4
0
 def _encode_src(self, src: Union[batchers.Batch, sent.Sentence]):
   embeddings = self.src_embedder.embed_sent(src)
   encoding = self.encoder.transduce(embeddings)
   final_state = self.encoder.get_final_states()
   self.attender.init_sent(encoding)
   ss = batchers.mark_as_batch([Vocab.SS] * src.batch_size()) if batchers.is_batched(src) else Vocab.SS
   initial_state = self.decoder.initial_state(final_state, self.trg_embedder.embed(ss))
   return initial_state
Exemple #5
0
 def __len__(self):
     if self.expr_list or self.expr_tensor:
         return super(LazyNumpyExpressionSequence, self).__len__()
     else:
         if batchers.is_batched(self.lazy_data):
             return self.lazy_data[0].get_array().shape[1]
         else:
             return self.lazy_data.get_array().shape[1]
Exemple #6
0
  def calc_loss(self, src, trg, infer_prediction=False):
    event_trigger.start_sent(src)
    if not batchers.is_batched(src):
      src = batchers.mark_as_batch([src])
    if not batchers.is_batched(trg):
      trg = batchers.mark_as_batch([trg])
    src_words = np.array([[Vocab.SS] + x.words for x in src])
    batch_size, src_len = src_words.shape

    if isinstance(src.mask, type(None)):
      src_mask = np.zeros((batch_size, src_len), dtype=np.int)
    else:
      src_mask = np.concatenate([np.zeros((batch_size, 1), dtype=np.int), src.mask.np_arr.astype(np.int)], axis=1)

    src_embeddings = self.sentence_block_embed(self.src_embedder.embeddings, src_words, src_mask)
    src_embeddings = self.make_input_embedding(src_embeddings, src_len)

    trg_words = np.array(list(map(lambda x: [Vocab.SS] + x.words[:-1], trg)))
    batch_size, trg_len = trg_words.shape

    if isinstance(trg.mask, type(None)):
      trg_mask = np.zeros((batch_size, trg_len), dtype=np.int)
    else:
      trg_mask = trg.mask.np_arr.astype(np.int)

    trg_embeddings = self.sentence_block_embed(self.trg_embedder.embeddings, trg_words, trg_mask)
    trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len)

    xx_mask = self.make_attention_mask(src_mask, src_mask)
    xy_mask = self.make_attention_mask(trg_mask, src_mask)
    yy_mask = self.make_attention_mask(trg_mask, trg_mask)
    yy_mask *= self.make_history_mask(trg_mask)

    z_blocks = self.encoder.transduce(src_embeddings, xx_mask)
    h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask)

    if infer_prediction:
      y_len = h_block.dim()[0][1]
      last_col = dy.pick(h_block, dim=1, index=y_len - 1)
      logits = self.decoder.output(last_col)
      return logits

    ref_list = list(itertools.chain.from_iterable(map(lambda x: x.words, trg)))
    concat_t_block = (1 - trg_mask.ravel()).reshape(-1) * np.array(ref_list)
    loss = self.decoder.output_and_loss(h_block, concat_t_block)
    return FactoredLossExpr({"mle": loss})
Exemple #7
0
    def generate(
            self,
            src: batchers.Batch,
            search_strategy: search_strategies.SearchStrategy,
            forced_trg_ids: batchers.Batch = None) -> Sequence[sent.Sentence]:
        event_trigger.start_sent(src)
        if not batchers.is_batched(src):
            src = batchers.mark_as_batch([src])
        outputs = []

        trg = sent.SimpleSentence([0])

        if not batchers.is_batched(trg):
            trg = batchers.mark_as_batch([trg])

        output_actions = []
        score = 0.

        # TODO Fix this with generate_one_step and use the appropriate search_strategy
        self.max_len = 100  # This is a temporary hack
        for _ in range(self.max_len):
            dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE,
                        check_validity=settings.CHECK_VALIDITY)
            log_prob_tail = self.calc_loss(src,
                                           trg,
                                           loss_cal=None,
                                           infer_prediction=True)
            ys = np.argmax(log_prob_tail.npvalue(), axis=0).astype('i')
            if ys == vocabs.Vocab.ES:
                output_actions.append(ys)
                break
            output_actions.append(ys)
            trg = sent.SimpleSentence(words=output_actions + [0])
            if not batchers.is_batched(trg):
                trg = batchers.mark_as_batch([trg])

        # Append output to the outputs
        if hasattr(self, "trg_vocab") and self.trg_vocab is not None:
            outputs.append(
                sent.SimpleSentence(words=output_actions,
                                    vocab=self.trg_vocab))
        else:
            outputs.append((output_actions, score))

        return outputs
Exemple #8
0
 def _select_ref_words(sent, index, truncate_masked = False):
   if truncate_masked:
     mask = sent.mask if batchers.is_batched(sent) else None
     if not batchers.is_batched(sent):
       return sent[index]
     else:
       ret = []
       found_masked = False
       for (j, single_trg) in enumerate(sent):
         if mask is None or mask.np_arr[j, index] == 0 or np.sum(mask.np_arr[:, index]) == mask.np_arr.shape[0]:
           assert not found_masked, "sentences must be sorted by decreasing target length"
           ret.append(single_trg[index])
         else:
           found_masked = True
       return batchers.mark_as_batch(ret)
   else:
     if not batchers.is_batched(sent): return sent[index]
     else: return batchers.mark_as_batch([single_trg[index] for single_trg in sent])
Exemple #9
0
 def embed_factor_sent(self, x, speech_len):
     # single mode
     if not batchers.is_batched(x):
         embeddings = [self.embed_factor(word) for word in x]
     # minibatch mode
     else:
         embeddings = []
         seq_len = x.sent_len()
         for single_sent in x:
             assert single_sent.sent_len() == seq_len
         #      for word_i in range(seq_len):
         for word_i in range(speech_len):
             batch = batchers.mark_as_batch(
                 [single_sent[word_i] for single_sent in x])
             embeddings.append(self.embed_factor(batch))
     return expression_seqs.ExpressionSequence(
         expr_list=embeddings,
         mask=x.mask if batchers.is_batched(x) else None)
Exemple #10
0
 def __getitem__(self, key):
   if self.expr_list or self.expr_tensor:
     return super().__getitem__(key)
   else:
     if batchers.is_batched(self.lazy_data):
       return dy.inputTensor(
         [self.lazy_data[batch].get_array()[:, key] for batch in range(self.lazy_data.batch_size())], batched=True)
     else:
       return dy.inputTensor(self.lazy_data.get_array()[:,key], batched=False)
Exemple #11
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence],
                 trg: Union[batchers.Batch, sent.Sentence]) -> LossExpr:
        if isinstance(src, batchers.CompoundBatch):
            src = src.batches[0]
        # Encode the sentence
        initial_state = self._initial_state(src)

        dec_state = initial_state
        trg_mask = trg.mask if batchers.is_batched(trg) else None
        cur_losses = []
        seq_len = trg.sent_len()

        # Sanity check if requested
        if settings.CHECK_VALIDITY and batchers.is_batched(src):
            for j, single_trg in enumerate(trg):
                # assert consistent length
                assert single_trg.sent_len() == seq_len
                # assert exactly one unmasked ES token
                assert 1 == len([
                    i for i in range(seq_len)
                    if (trg_mask is None or trg_mask.np_arr[j, i] == 0)
                    and single_trg[i] == vocabs.Vocab.ES
                ])

        input_word = None
        for i in range(seq_len):
            ref_word = self._select_ref_words(
                trg, i, truncate_masked=self.truncate_dec_batches)

            if input_word is not None:
                dec_state = self.decoder.add_input(dec_state, input_word)
            rnn_output = dec_state.as_vector()
            dec_state.context = self.attender.calc_context(rnn_output)
            word_loss = self.decoder.calc_loss(dec_state, ref_word)

            if not self.truncate_dec_batches and batchers.is_batched(
                    src) and trg_mask is not None:
                word_loss = trg_mask.cmult_by_timestep_expr(word_loss,
                                                            i,
                                                            inverse=True)
            cur_losses.append(word_loss)
            input_word = ref_word
        units = [t.len_unpadded() for t in trg]
        return LossExpr(dy.esum(cur_losses), units)
Exemple #12
0
 def embed(self, x: Union[numbers.Integral, batchers.Batch]) -> tt.Tensor:
     if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
         batch_size = x.batch_size() if batchers.is_batched(x) else 1
         self.word_id_mask = [
             set(
                 np.random.choice(self.vocab_size,
                                  int(self.vocab_size * self.word_dropout),
                                  replace=False)) for _ in range(batch_size)
         ]
     # single mode
     if not batchers.is_batched(x):
         if self.train and self.word_id_mask and x in self.word_id_mask[0]:
             ret = tt.zeroes(hidden_dim=self.emb_dim)
         else:
             ret = self.embeddings(
                 torch.tensor(x, dtype=torch.long).to(xnmt.device))
             ret = ret.unsqueeze(0)
             if self.fix_norm is not None:
                 ret = torch.div(ret, torch.norm(ret))
                 if self.fix_norm != 1:
                     ret = torch.mul(ret, self.fix_norm)
     # minibatch mode
     else:
         ret = self.embeddings(
             torch.tensor(x, dtype=torch.long).to(xnmt.device))
         if self.fix_norm is not None:
             ret = torch.div(ret, torch.norm(ret, dim=1).unsqueeze(1))
             if self.fix_norm != 1:
                 ret = torch.mul(self.fix_norm)
         if self.train and self.word_id_mask and any(
                 x[i] in self.word_id_mask[i]
                 for i in range(x.batch_size())):
             dropout_mask = torch.tensor(
                 [[0.0] * self.emb_dim
                  if x[i] in self.word_id_mask[i] else [1.0] * self.emb_dim
                  for i in range(x.batch_size())],
                 device=xnmt.device)
             ret = torch.mul(ret, dropout_mask)
     if self.train and self.weight_noise > 0.0:
         noise = torch.autograd.Variable(
             ret.data.new(ret.size(), device=xnmt.device).normal_(
                 0.0, self.weight_noise))
         ret = ret + noise
     return ret
Exemple #13
0
  def generate(self, src, forced_trg_ids):
    assert not forced_trg_ids
    assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented"
    src = src[0]
    # Generating outputs
    outputs = []
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.generate_per_step:
        assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True"
        scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings]
      else:
        if self.mode == "avg_mlp":
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1])
        elif self.mode == "final_mlp":
          encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
        scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode == "lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True))
        enc_lin.append(step_linear)
      if self.generate_per_step:
        scores = [dy.logistic(enc_i) for enc_i in enc_lin]
      else:
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                        dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
        scores = dy.logistic(encoding_fixed_size)
    else:
      raise ValueError(f"unknown mode '{self.mode}'")

    if self.generate_per_step:
      output_actions = [np.argmax(score_i.npvalue()) for score_i in scores]
      score = np.sum([np.max(score_i.npvalue()) for score_i in scores])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    else:
      scores_arr = scores.npvalue()
      output_actions = list(np.nonzero(scores_arr > 0.5)[0])
      score = np.sum(scores_arr[scores_arr > 0.5])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    return outputs
Exemple #14
0
  def calc_nll(self, src, trg):
    assert batchers.is_batched(src) and batchers.is_batched(trg)
    batch_size, encodings, outputs, seq_len = self._encode_src(src)

    if trg.sent_len() != seq_len:
      if self.auto_cut_pad:
        trg = self._cut_or_pad_targets(seq_len, trg)
      else:
        raise ValueError(f"src/trg length do not match: {seq_len} != {len(trg[0])}")

    ref_action = np.asarray([trg_sent.words for trg_sent in trg]).reshape((seq_len * batch_size,))
    loss_expr_perstep = self.scorer.calc_loss(outputs, batchers.mark_as_batch(ref_action))
    # loss_expr_perstep = dy.pickneglogsoftmax_batch(outputs, ref_action)
    loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len,), batch_size=batch_size)
    if trg.mask:
      loss_expr_perstep = dy.cmult(loss_expr_perstep, dy.inputTensor(1.0-trg.mask.np_arr.T, batched=True))
    loss_expr = dy.sum_elems(loss_expr_perstep)

    return loss_expr
Exemple #15
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \
            -> LossExpr:
        if batchers.is_batched(trg):
            units = [t.len_unpadded() for t in trg]
            ids = batchers.ListBatch([t.value for t in trg])
        else:
            units = trg.len_unpadded()
            ids = trg.value

        h = self._encode_src(src)
        loss_expr = self.scorer.calc_loss(h, ids)
        return LossExpr(loss_expr, units)
Exemple #16
0
  def calc_loss(self, x: dy.Expression, y: Union[numbers.Integral, List[numbers.Integral]]) -> dy.Expression:
    if self.can_loss_be_derived_from_scores():
      scores = self.calc_scores(x)
      # single mode
      if not batchers.is_batched(y):
        loss = dy.pickneglogsoftmax(scores, y)
      # minibatch mode
      else:
        loss = dy.pickneglogsoftmax_batch(scores, y)
    else:
      log_prob = self.calc_log_probs(x)
      if not batchers.is_batched(y):
        loss = -dy.pick(log_prob, y)
      else:
        loss = -dy.pick_batch(log_prob, y)

      if self.label_smoothing > 0:
        ls_loss = -dy.mean_elems(log_prob)
        loss = ((1 - self.label_smoothing) * loss) + (self.label_smoothing * ls_loss)
    
    return loss
Exemple #17
0
 def embed(self, x: Union[batchers.Batch,
                          numbers.Integral]) -> dy.Expression:
     if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
         batch_size = x.batch_size() if batchers.is_batched(x) else 1
         self.word_id_mask = [
             set(
                 np.random.choice(self.vocab_size,
                                  int(self.vocab_size * self.word_dropout),
                                  replace=False)) for _ in range(batch_size)
         ]
     emb_e = dy.parameter(self.embeddings)
     # single mode
     if not batchers.is_batched(x):
         if self.train and self.word_id_mask and x in self.word_id_mask[0]:
             ret = dy.zeros((self.emb_dim, ))
         else:
             ret = dy.pick(emb_e, index=x)
             if self.fix_norm is not None:
                 ret = dy.cdiv(ret, dy.l2_norm(ret))
                 if self.fix_norm != 1:
                     ret *= self.fix_norm
     # minibatch mode
     else:
         ret = dy.pick_batch(emb_e, x)
         if self.fix_norm is not None:
             ret = dy.cdiv(ret, dy.l2_norm(ret))
             if self.fix_norm != 1:
                 ret *= self.fix_norm
         if self.train and self.word_id_mask and any(
                 x[i] in self.word_id_mask[i]
                 for i in range(x.batch_size())):
             dropout_mask = dy.inputTensor(np.transpose(
                 [[0.0] *
                  self.emb_dim if x[i] in self.word_id_mask[i] else [1.0] *
                  self.emb_dim for i in range(x.batch_size())]),
                                           batched=True)
             ret = dy.cmult(ret, dropout_mask)
     if self.train and self.weight_noise > 0.0:
         ret = dy.noise(ret, self.weight_noise)
     return ret
Exemple #18
0
    def calc_nll(self, src, trg):
        event_trigger.start_sent(src)
        if isinstance(src, batchers.CompoundBatch):
            src, _ = src.batches
        initial_state = self._encode_src(src)

        dec_state = initial_state
        trg_mask = trg.mask if batchers.is_batched(trg) else None
        losses = []
        seq_len = trg.sent_len()
        if batchers.is_batched(src):
            for j, single_trg in enumerate(trg):
                assert single_trg.sent_len(
                ) == seq_len  # assert consistent length
                assert 1 == len([
                    i for i in range(seq_len)
                    if (trg_mask is None or trg_mask.np_arr[j, i] == 0)
                    and single_trg[i] == vocabs.Vocab.ES
                ])  # assert exactly one unmasked ES token
        prev_ref_word = None
        for i in range(seq_len):
            if not batchers.is_batched(trg):
                ref_word = trg[i]
            else:
                ref_word = batchers.mark_as_batch(
                    [single_trg[i] for single_trg in trg])
            word_loss = self.calc_loss_one_step(
                dec_state=dec_state,
                batch_size=ref_word.batch_size(),
                ref_action=ref_word,
                prev_ref_action=prev_ref_word,
                mode=self.mode_translate)
            if batchers.is_batched(src) and trg_mask is not None:
                word_loss = trg_mask.cmult_by_timestep_expr(word_loss,
                                                            i,
                                                            inverse=True)
            losses.append(word_loss)
            prev_ref_word = ref_word

        return dy.esum(losses)
Exemple #19
0
    def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \
            -> tt.Tensor:
        assert batchers.is_batched(src) and batchers.is_batched(trg)
        batch_size, encodings, outputs, seq_len = self._encode_src(src)

        if trg.sent_len() != seq_len:
            if self.auto_cut_pad:
                trg = self._cut_or_pad_targets(seq_len, trg)
            else:
                raise ValueError(
                    f"src/trg length do not match: {seq_len} != {trg.sent_len()}"
                )

        ref_action = np.asarray([trg_sent.words for trg_sent in trg]).reshape(
            (seq_len * batch_size, ))
        loss_expr_perstep = self.scorer.calc_loss(
            outputs, batchers.mark_as_batch(ref_action))
        loss_expr_perstep = tt.unmerge_time_batch_dims(loss_expr_perstep,
                                                       batch_size)
        loss_expr = tt.aggregate_masked_loss(loss_expr_perstep, trg.mask)

        return loss_expr
Exemple #20
0
  def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression:
    event_trigger.start_sent(src)
#    if isinstance(src, batchers.CompoundBatch): src = src.batches[0]
    # Encode the sentence
    initial_state = self._encode_src(src)

    dec_state = initial_state
    trg_mask = trg.mask if batchers.is_batched(trg) else None
    losses = []
    seq_len = trg.sent_len()

    if settings.CHECK_VALIDITY and batchers.is_batched(src):
      for j, single_trg in enumerate(trg):
        assert single_trg.sent_len() == seq_len # assert consistent length
        assert 1==len([i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j,i]==0) and single_trg[i]==Vocab.ES]) # assert exactly one unmasked ES token

    input_word = None
    for i in range(seq_len):
      ref_word = DefaultTranslator._select_ref_words(trg, i, truncate_masked=self.truncate_dec_batches)
      if self.truncate_dec_batches and batchers.is_batched(ref_word):
        dec_state.rnn_state, ref_word = batchers.truncate_batches(dec_state.rnn_state, ref_word)

      if input_word is not None:
        dec_state = self.decoder.add_input(dec_state, self.trg_embedder.embed(input_word))
      rnn_output = dec_state.rnn_state.output()
      dec_state.context = self.attender.calc_context(rnn_output)
      word_loss = self.decoder.calc_loss(dec_state, ref_word)

      if not self.truncate_dec_batches and batchers.is_batched(src) and trg_mask is not None:
        word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True)
      losses.append(word_loss)
      input_word = ref_word

    if self.truncate_dec_batches:
      loss_expr = dy.esum([dy.sum_batches(wl) for wl in losses])
    else:
      loss_expr = dy.esum(losses)
    return loss_expr
Exemple #21
0
 def embed_factor(self, x):
     if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
         batch_size = x.batch_size() if batchers.is_batched(x) else 1
         self.word_id_mask = [
             set(
                 np.random.choice(self.vocab_size,
                                  int(self.vocab_size * self.word_dropout),
                                  replace=False)) for _ in range(batch_size)
         ]
     # single mode
     if not batchers.is_batched(x):
         if self.train and self.word_id_mask and x in self.word_id_mask[0]:
             ret = dy.zeros((self.fact_emb_dim, ))
         else:
             ret = self.embeddings[x]
             if self.fix_norm is not None:
                 ret = dy.cdiv(ret, dy.l2_norm(ret))
                 if self.fix_norm != 1:
                     ret *= self.fix_norm
     # minibatch mode
     else:
         ret = self.embeddings.batch(x)
         if self.fix_norm is not None:
             ret = dy.cdiv(ret, dy.l2_norm(ret))
             if self.fix_norm != 1:
                 ret *= self.fix_norm
         if self.train and self.word_id_mask and any(
                 x[i] in self.word_id_mask[i]
                 for i in range(x.batch_size())):
             dropout_mask = dy.inputTensor(np.transpose(
                 [[0.0] * self.fact_emb_dim if x[i] in self.word_id_mask[i]
                  else [1.0] * self.fact_emb_dim
                  for i in range(x.batch_size())]),
                                           batched=True)
             ret = dy.cmult(ret, dropout_mask)
     if self.train and self.weight_noise > 0.0:
         ret = dy.noise(ret, self.weight_noise)
     return ret
Exemple #22
0
  def embed_sent(self, x: Any) -> expression_seqs.ExpressionSequence:
    """Embed a full sentence worth of words. By default, just do a for loop.

    Args:
      x: This will generally be a list of word IDs, but could also be a list of strings or some other format.
         It could also be batched, in which case it will be a (possibly masked) :class:`xnmt.batcher.Batch` object

    Returns:
      An expression sequence representing vectors of each word in the input.
    """
    # single mode
    if not batchers.is_batched(x):
      embeddings = [self.embed(word) for word in x]
    # minibatch mode
    else:
      embeddings = []
      seq_len = x.sent_len()
      for single_sent in x: assert single_sent.sent_len()==seq_len
      for word_i in range(seq_len):
        batch = batchers.mark_as_batch([single_sent[word_i] for single_sent in x])
        embeddings.append(self.embed(batch))

    return expression_seqs.ExpressionSequence(expr_list=embeddings, mask=x.mask if batchers.is_batched(x) else None)
Exemple #23
0
    def calc_loss(self, x: dy.Expression,
                  y: Union[int, List[int]]) -> dy.Expression:

        scores = self.calc_scores(x)

        if self.label_smoothing == 0.0:
            # single mode
            if not batchers.is_batched(y):
                loss = dy.pickneglogsoftmax(scores, y)
            # minibatch mode
            else:
                loss = dy.pickneglogsoftmax_batch(scores, y)
        else:
            log_prob = dy.log_softmax(scores)
            if not batchers.is_batched(y):
                pre_loss = -dy.pick(log_prob, y)
            else:
                pre_loss = -dy.pick_batch(log_prob, y)

            ls_loss = -dy.mean_elems(log_prob)
            loss = ((1 - self.label_smoothing) *
                    pre_loss) + (self.label_smoothing * ls_loss)

        return loss
Exemple #24
0
 def embed(self, x: Union[batchers.Batch,
                          numbers.Integral]) -> dy.Expression:
     """
 Embed a single word in a sentence.
 :param x: A word id.
 :return: Embedded word.
 """
     ret = self._embed_word(x, batchers.is_batched(x))
     ## Applying Fix normalization
     if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret)) * self.fix_norm
     ## Weight noise only when training
     if self.train and self.weight_noise > 0.0:
         ret = dy.noise(ret, self.weight_noise)
     return ret
Exemple #25
0
 def __exit__(self, et, ev, traceback):
     if et is not None:  # exception occurred
         logger.error("------ Error Report ------")
         for key, val in self.args.items():
             logger.error(f"*** {key} ***")
             if callable(val):
                 val()
             elif batchers.is_batched(val):
                 for sent in val:
                     if hasattr(sent, "idx"):
                         print("{:>10}. {}".format(sent.idx,
                                                   str(sent)[:100]))
                     else:
                         print("{}".format(str(sent)))
             else:
                 logger.error(str(val))
Exemple #26
0
 def generate_output(self, translator, initial_state,
                     src_length=None, forced_trg_ids=None):
   # Output variables
   score = []
   word_ids = []
   attentions = []
   logsoftmaxes = []
   states = []
   masks = []
   # Search Variables
   done = None
   current_state = initial_state
   for length in range(self.max_len):
     prev_word = word_ids[length-1] if length > 0 else None
     current_output = translator.generate_one_step(prev_word, current_state)
     current_state = current_output.state
     if forced_trg_ids is None:
       word_id = np.argmax(current_output.logsoftmax.npvalue(), axis=0)
       if len(word_id.shape) == 2:
         word_id = word_id[0]
     else:
       if batchers.is_batched(forced_trg_ids):
         word_id = [forced_trg_ids[i][length] for i in range(len(forced_trg_ids))]
       else:
         word_id = [forced_trg_ids[length]]
     logsoft = dy.pick_batch(current_output.logsoftmax, word_id)
     if done is not None:
       word_id = [word_id[i] if not done[i] else Vocab.ES for i in range(len(done))]
       # masking for logsoftmax
       mask = [1 if not done[i] else 0 for i in range(len(done))]
       logsoft = dy.cmult(logsoft, dy.inputTensor(mask, batched=True))
       masks.append(mask)
     # Packing outputs
     score.append(logsoft.npvalue())
     word_ids.append(word_id)
     attentions.append(current_output.attention)
     logsoftmaxes.append(dy.pick_batch(current_output.logsoftmax, word_id))
     states.append(translator.get_nobp_state(current_state))
     # Check if we are done.
     done = [x == Vocab.ES for x in word_id]
     if all(done):
       break
   masks.insert(0, [1 for _ in range(len(done))])
   words = np.stack(word_ids, axis=1)
   score = np.sum(score, axis=0)
   return [SearchOutput(words, attentions, score, logsoftmaxes, states, masks)]
Exemple #27
0
 def embed_speech_sent(self, x):
     # TODO refactor: seems a bit too many special cases that need to be distinguished
     #    x = x.batches[0]
     batched = batchers.is_batched(x)
     first_sent = x[0] if batched else x
     if hasattr(first_sent, "get_array"):
         if not batched:
             return expression_seqs.LazyNumpyExpressionSequence(
                 lazy_data=x.get_array())
         else:
             return expression_seqs.LazyNumpyExpressionSequence(
                 lazy_data=batchers.mark_as_batch([s for s in x]),
                 mask=x.mask)
     else:
         raise ValueError("!! Expected to use above")
         return expression_seqs.ExpressionSequence(expr_list=embeddings,
                                                   mask=x.mask)
Exemple #28
0
    def embed_sent(self, x: Any) -> expression_seqs.ExpressionSequence:
        """Embed a full sentence worth of words. By default, just do a for loop.

    Args:
      x: This will generally be a list of word IDs, but could also be a list of strings or some other format.
         It could also be batched, in which case it will be a (possibly masked) :class:`xnmt.batcher.Batch` object

    Returns:
      An expression sequence representing vectors of each word in the input.
    """
        # single mode
        if not batchers.is_batched(x):
            expr = expression_seqs.ExpressionSequence(
                expr_list=[self.embed(word) for word in x])
        # minibatch mode
        elif type(self) == LookupEmbedder:
            embeddings = []
            for word_i in range(x.sent_len()):
                batch = batchers.mark_as_batch(
                    [single_sent[word_i] for single_sent in x])
                embeddings.append(self.embed(batch))
            expr = expression_seqs.ExpressionSequence(expr_list=embeddings,
                                                      mask=x.mask)
        else:
            assert type(
                x[0]
            ) == sent.SegmentedSentence, "Need to use CharFromWordTextReader for non standard embeddings."
            embeddings = []
            all_embeddings = []
            for sentence in x:
                embedding = []
                for i in range(sentence.len_unpadded()):
                    embed_word = self.embed(sentence.words[i])
                    embedding.append(embed_word)
                    all_embeddings.append(embed_word)
                embeddings.append(embedding)
            # Useful when using dy.autobatch
            dy.forward(all_embeddings)
            all_embeddings.clear()
            # Pad the results
            expr = batchers.pad_embedding(embeddings)

        return expr
Exemple #29
0
 def embed_sent(self, x: sent.Sentence) -> expression_seqs.ExpressionSequence:
   # TODO refactor: seems a bit too many special cases that need to be distinguished
   batched = batchers.is_batched(x)
   first_sent = x[0] if batched else x
   if hasattr(first_sent, "get_array"):
     if not batched:
       return expression_seqs.LazyNumpyExpressionSequence(lazy_data=x.get_array())
     else:
       return expression_seqs.LazyNumpyExpressionSequence(lazy_data=batchers.mark_as_batch(
                                          [s for s in x]),
                                          mask=x.mask)
   else:
     if not batched:
       embeddings = [self.embed(word) for word in x]
     else:
       embeddings = []
       for word_i in range(x.sent_len()):
         embeddings.append(self.embed(batchers.mark_as_batch([single_sent[word_i] for single_sent in x])))
     return expression_seqs.ExpressionSequence(expr_list=embeddings, mask=x.mask)
Exemple #30
0
  def generate(self,
               src: Union[batchers.Batch, sent.Sentence],
               normalize_scores: bool = False):
    if not batchers.is_batched(src):
      src = batchers.mark_as_batch([src])
    h = self._encode_src(src)
    best_words, best_scores = self.scorer.best_k(h, k=1, normalize_scores=normalize_scores)
    assert best_words.shape == (1, src.batch_size())
    assert best_scores.shape == (1, src.batch_size())

    outputs = []
    for batch_i in range(src.batch_size()):
      if src.batch_size() > 1:
        word = best_words[0, batch_i]
        score = best_scores[0, batch_i]
      else:
        word = best_words[0]
        score = best_scores[0]
      outputs.append(sent.ScalarSentence(value=word, score=score))
    return outputs