Ejemplo n.º 1
0
  def transduce(self, es):
    """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one.

    Args:
      es: a list of Expression

    see also add_inputs(xs), including for explanation of differences between
    add_inputs and this function.
    """
    es = self.builder_layers[0](es)
    self._final_states = [self.builder_layers[0].get_final_states()[0]]

    if len(self.builder_layers) == 1:
      return es

    for l in self.builder_layers[1:]:
      es = ExpressionSequence(expr_list=self._sum_lists(l(es), es))
      self._final_states.append(FinalTransducerState(es[-1], l.get_final_states()[0].cell_expr()))

    last_output = self.builder_layers[-1](es)

    if self.add_to_output:
      self._final_states.append(FinalTransducerState(last_output[-1], self.builder_layers[-1].get_final_states()[0].cell_expr()))
      return ExpressionSequence(expr_list=self._sum_lists(last_output, es))
    else:
      self._final_states.append(self.builder_layers[-1].get_final_states()[0])
      return last_output
Ejemplo n.º 2
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        sent_len = src.dim()[0][1]
        src_width = 1
        batch_size = src.dim()[1]
        pad_size = (self.window_receptor -
                    1) / 2  #TODO adapt it also for even window size

        src = dy.concatenate([
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src,
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size)
        ],
                             d=1)
        padded_sent_len = sent_len + 2 * pad_size

        conv1 = dy.parameter(self.pConv1)
        bias1 = dy.parameter(self.pBias1)
        src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1),
                             batch_size=batch_size)
        cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1])

        hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1),
                                  batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)

        for conv_hid, bias_hid in self.builder_layers:
            hidden_layer = dy.conv2d_bias(hidden_layer,
                                          dy.parameter(conv_hid),
                                          dy.parameter(bias_hid),
                                          stride=[1, 1])
            hidden_layer = dy.reshape(hidden_layer,
                                      (self.internal_dim, sent_len, 1),
                                      batch_size=batch_size)
            if self.non_linearity is 'linear':
                hidden_layer = hidden_layer
            elif self.non_linearity is 'tanh':
                hidden_layer = dy.tanh(hidden_layer)
            elif self.non_linearity is 'relu':
                hidden_layer = dy.rectify(hidden_layer)
            elif self.non_linearity is 'sigmoid':
                hidden_layer = dy.logistic(hidden_layer)
        last_conv = dy.parameter(self.last_conv)
        last_bias = dy.parameter(self.last_bias)
        output = dy.conv2d_bias(hidden_layer,
                                last_conv,
                                last_bias,
                                stride=[1, 1])
        output = dy.reshape(output, (sent_len, self.output_dim),
                            batch_size=batch_size)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Ejemplo n.º 3
0
    def __call__(self, es):
        mask = es.mask
        # first layer
        forward_es = self.forward_layers[0](es)
        rev_backward_es = self.backward_layers[0](
            ReversedExpressionSequence(es))

        for layer_i in range(1, len(self.forward_layers)):
            new_forward_es = self.forward_layers[layer_i](
                [forward_es,
                 ReversedExpressionSequence(rev_backward_es)])
            rev_backward_es = ExpressionSequence(self.backward_layers[layer_i](
                [ReversedExpressionSequence(forward_es),
                 rev_backward_es]).as_list(),
                                                 mask=mask)
            forward_es = new_forward_es

        self._final_states = [FinalTransducerState(dy.concatenate([self.forward_layers[layer_i].get_final_states()[0].main_expr(),
                                                                self.backward_layers[layer_i].get_final_states()[0].main_expr()]),
                                                dy.concatenate([self.forward_layers[layer_i].get_final_states()[0].cell_expr(),
                                                                self.backward_layers[layer_i].get_final_states()[0].cell_expr()])) \
                              for layer_i in range(len(self.forward_layers))]
        return ExpressionSequence(expr_list=[
            dy.concatenate([forward_es[i], rev_backward_es[-i - 1]])
            for i in range(len(forward_es))
        ],
                                  mask=mask)
Ejemplo n.º 4
0
    def __call__(self, expr_seq):
        """
    transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c)

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """
        if isinstance(expr_seq, ExpressionSequence):
            expr_seq = [expr_seq]
        batch_size = expr_seq[0][0].dim()[1]
        seq_len = len(expr_seq[0])

        if self.dropout_rate > 0.0 and self.train:
            self.set_dropout_masks(batch_size=batch_size)

        cur_input = expr_seq
        self._final_states = []
        for layer_i in range(self.num_layers):
            h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            for pos_i in range(seq_len):
                x_t = [cur_input[j][pos_i] for j in range(len(cur_input))]
                if isinstance(x_t, dy.Expression):
                    x_t = [x_t]
                elif type(x_t) != list:
                    x_t = list(x_t)
                if self.dropout_rate > 0.0 and self.train:
                    # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
                    gates_t = dy.vanilla_lstm_gates_dropout_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i], self.dropout_mask_x[layer_i],
                        self.dropout_mask_h[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                else:
                    gates_t = dy.vanilla_lstm_gates_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                c_t = dy.vanilla_lstm_c(c[-1], gates_t)
                h_t = dy.vanilla_lstm_h(c_t, gates_t)
                if expr_seq[0].mask is None or np.isclose(
                        np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]),
                        0.0):
                    c.append(c_t)
                    h.append(h_t)
                else:
                    c.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        c_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 c[-1], pos_i, False))
                    h.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        h_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 h[-1], pos_i, False))
            self._final_states.append(FinalTransducerState(h[-1], c[-1]))
            cur_input = [h[1:]]

        return ExpressionSequence(expr_list=h[1:], mask=expr_seq[0].mask)
Ejemplo n.º 5
0
    def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence:
        """
    transduce the sequence

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """

        Wq, Wk, Wv, Wo = [
            dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo)
        ]
        bq, bk, bv, bo = [
            dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo)
        ]

        # Start with a [(length, model_size) x batch] tensor
        x = expr_seq.as_transposed_tensor()
        x_len = x.dim()[0][0]
        x_batch = x.dim()[1]
        # Get the query key and value vectors
        # TODO: do we need bias broadcasting in DyNet?
        # q = dy.affine_transform([bq, x, Wq])
        # k = dy.affine_transform([bk, x, Wk])
        # v = dy.affine_transform([bv, x, Wv])
        q = bq + x * Wq
        k = bk + x * Wk
        v = bv + x * Wv

        # Split to batches [(length, head_dim) x batch * num_heads] tensor
        q, k, v = [
            dy.reshape(x, (x_len, self.head_dim),
                       batch_size=x_batch * self.num_heads) for x in (q, k, v)
        ]

        # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys
        attn_score = q * dy.transpose(k) / sqrt(self.head_dim)
        if expr_seq.mask is not None:
            mask = dy.inputTensor(np.repeat(
                expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(),
                                  batched=True) * -1e10
            attn_score = attn_score + mask
        attn_prob = dy.softmax(attn_score, d=1)
        # Reduce using attention and resize to match [(length, model_size) x batch]
        o = dy.reshape(attn_prob * v, (x_len, self.input_dim),
                       batch_size=x_batch)
        # Final transformation
        # o = dy.affine_transform([bo, attn_prob * v, Wo])
        o = bo + o * Wo

        expr_seq = ExpressionSequence(expr_transposed_tensor=o,
                                      mask=expr_seq.mask)

        self._final_states = [FinalTransducerState(expr_seq[-1], None)]

        return expr_seq
Ejemplo n.º 6
0
  def transduce(self, es):
    forward_e = self.forward_layer(es)
    backward_e = self.backward_layer(ReversedExpressionSequence(es))
    self._final_states = [FinalTransducerState(dy.concatenate([self.forward_layer.get_final_states()[0].main_expr(),
                                                            self.backward_layer.get_final_states()[0].main_expr()]),
                                            dy.concatenate([self.forward_layer.get_final_states()[0].cell_expr(),
                                                            self.backward_layer.get_final_states()[0].cell_expr()]))]

    output = self.residual_network.transduce(ExpressionSequence(expr_list=[dy.concatenate([f,b]) for f,b in zip(forward_e, ReversedExpressionSequence(backward_e))]))
    self._final_states += self.residual_network.get_final_states()
    return output
Ejemplo n.º 7
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
   sent_len = len(src)
   embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len])
   if self.op == 'sum':
     output = embeddings + src.as_tensor()
   elif self.op == 'concat':
     output = dy.concatenate([embeddings, src.as_tensor()])
   else:
     raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")')
   output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask)
   self._final_states = [FinalTransducerState(output_seq[-1])]
   return output_seq
Ejemplo n.º 8
0
  def __call__(self, es):
    """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    :param es: an ExpressionSequence
    """

    es_list = [es]

    for layer_i, (fb, bb) in enumerate(self.builder_layers):
      reduce_factor = self._reduce_factor_for_layer(layer_i)
      if self.downsampling_method=="concat" and len(es_list[0]) % reduce_factor != 0:
        raise ValueError("For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor. Configure batcher accordingly.")
      fs = fb(es_list)
      bs = bb([ReversedExpressionSequence(es_item) for es_item in es_list])
      if layer_i < len(self.builder_layers) - 1:
        if self.downsampling_method=="skip":
          es_list = [ExpressionSequence(expr_list=fs[::reduce_factor]), ExpressionSequence(expr_list=bs[::reduce_factor][::-1])]
        elif self.downsampling_method=="concat":
          es_len = len(es_list[0])
          es_list_fwd = []
          es_list_bwd = []
          for i in range(0, es_len, reduce_factor):
            for j in range(reduce_factor):
              if i==0:
                es_list_fwd.append([])
                es_list_bwd.append([])
              es_list_fwd[j].append(fs[i+j])
              es_list_bwd[j].append(bs[len(es_list[0])-reduce_factor+j-i])
          es_list = [ExpressionSequence(expr_list=es_list_fwd[j]) for j in range(reduce_factor)] + [ExpressionSequence(expr_list=es_list_bwd[j]) for j in range(reduce_factor)]
        else:
          raise RuntimeError("unknown downsampling_method %s" % self.downsampling_method)
      else:
        # concat final outputs
        ret_es = ExpressionSequence(expr_list=[dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs))])

    self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                            bb.get_final_states()[0].main_expr()]),
                                            dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                            bb.get_final_states()[0].cell_expr()])) \
                          for (fb, bb) in self.builder_layers]

    return ret_es
Ejemplo n.º 9
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        l1 = dy.affine_transform([b, W, src])
        output = l1
        if self.nonlinearity is 'linear':
            output = l1
        elif self.nonlinearity is 'sigmoid':
            output = dy.logistic(l1)
        elif self.nonlinearity is 'tanh':
            output = 2 * dy.logistic(l1) - 1
        elif self.nonlinearity is 'relu':
            output = dy.rectify(l1)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Ejemplo n.º 10
0
    def __call__(self, es):
        """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    :param es: an ExpressionSequence
    """

        es_list = [es]
        zero_pad = None
        batch_size = es_list[0][0].dim()[1]

        for layer_i, (fb, bb) in enumerate(self.builder_layers):
            reduce_factor = self._reduce_factor_for_layer(layer_i)
            while self.downsampling_method == "concat" and len(
                    es_list[0]) % reduce_factor != 0:
                for es_i in range(len(es_list)):
                    expr_list = es_list[es_i].as_list()
                    if zero_pad is None or zero_pad.dim(
                    )[0][0] != expr_list[0].dim()[0][0]:
                        zero_pad = dy.zeros(dim=expr_list[0].dim()[0][0],
                                            batch_size=batch_size)
                    expr_list.append(zero_pad)
                    es_list[es_i] = ExpressionSequence(expr_list=expr_list)
            fs = fb(es_list)
            bs = bb(
                [ReversedExpressionSequence(es_item) for es_item in es_list])
            if layer_i < len(self.builder_layers) - 1:
                if self.downsampling_method == "skip":
                    es_list = [
                        ExpressionSequence(expr_list=fs[::reduce_factor]),
                        ExpressionSequence(expr_list=bs[::reduce_factor][::-1])
                    ]
                elif self.downsampling_method == "concat":
                    es_len = len(es_list[0])
                    es_list_fwd = []
                    es_list_bwd = []
                    for i in range(0, es_len, reduce_factor):
                        for j in range(reduce_factor):
                            if i == 0:
                                es_list_fwd.append([])
                                es_list_bwd.append([])
                            es_list_fwd[j].append(fs[i + j])
                            es_list_bwd[j].append(bs[len(es_list[0]) -
                                                     reduce_factor + j - i])
                    es_list = [
                        ExpressionSequence(expr_list=es_list_fwd[j])
                        for j in range(reduce_factor)
                    ] + [
                        ExpressionSequence(expr_list=es_list_bwd[j])
                        for j in range(reduce_factor)
                    ]
                else:
                    raise RuntimeError("unknown downsampling_method %s" %
                                       self.downsampling_method)
            else:
                # concat final outputs
                ret_es = ExpressionSequence(expr_list=[
                    dy.concatenate([f, b])
                    for f, b in zip(fs, ReversedExpressionSequence(bs))
                ])

        self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                                bb.get_final_states()[0].main_expr()]),
                                                dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                                bb.get_final_states()[0].cell_expr()])) \
                              for (fb, bb) in self.builder_layers]

        return ret_es
Ejemplo n.º 11
0
    def transduce(self, es: ExpressionSequence) -> ExpressionSequence:
        """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    Args:
      es: an ExpressionSequence
    """
        es_list = [es]

        for layer_i, (fb, bb) in enumerate(self.builder_layers):
            reduce_factor = self._reduce_factor_for_layer(layer_i)

            if es_list[0].mask is None: mask_out = None
            else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor)

            if self.downsampling_method == "concat" and len(
                    es_list[0]) % reduce_factor != 0:
                raise ValueError(
                    f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, "
                    f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. "
                    f"Set Batcher's pad_src_to_multiple argument accordingly.")
            fs = fb.transduce(es_list)
            bs = bb.transduce(
                [ReversedExpressionSequence(es_item) for es_item in es_list])
            if layer_i < len(self.builder_layers) - 1:
                if self.downsampling_method == "skip":
                    es_list = [
                        ExpressionSequence(expr_list=fs[::reduce_factor],
                                           mask=mask_out),
                        ExpressionSequence(expr_list=bs[::reduce_factor][::-1],
                                           mask=mask_out)
                    ]
                elif self.downsampling_method == "concat":
                    es_len = len(es_list[0])
                    es_list_fwd = []
                    es_list_bwd = []
                    for i in range(0, es_len, reduce_factor):
                        for j in range(reduce_factor):
                            if i == 0:
                                es_list_fwd.append([])
                                es_list_bwd.append([])
                            es_list_fwd[j].append(fs[i + j])
                            es_list_bwd[j].append(bs[len(es_list[0]) -
                                                     reduce_factor + j - i])
                    es_list = [ExpressionSequence(expr_list=es_list_fwd[j], mask=mask_out) for j in range(reduce_factor)] + \
                              [ExpressionSequence(expr_list=es_list_bwd[j], mask=mask_out) for j in range(reduce_factor)]
                else:
                    raise RuntimeError(
                        f"unknown downsampling_method {self.downsampling_method}"
                    )
            else:
                # concat final outputs
                ret_es = ExpressionSequence(expr_list=[
                    dy.concatenate([f, b])
                    for f, b in zip(fs, ReversedExpressionSequence(bs))
                ],
                                            mask=mask_out)

        self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                                   bb.get_final_states()[0].main_expr()]),
                                                   dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                                   bb.get_final_states()[0].cell_expr()])) \
                              for (fb, bb) in self.builder_layers]

        return ret_es
Ejemplo n.º 12
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
     output = self.transform(src.as_tensor())
     output_seq = ExpressionSequence(expr_tensor=output)
     self._final_states = [FinalTransducerState(output_seq[-1])]
     return output_seq
Ejemplo n.º 13
0
Archivo: lstm.py Proyecto: anhad13/xnmt
    def __call__(self, es, transitions):
        mask = es.mask
        #import pdb;pdb.set_trace()
        transitions = [t + [0, 1] for t in transitions]
        transitions = np.array(transitions)
        maxlen = max(len(r) for r in transitions)
        Wl = dy.parameter(self.p_Wl)
        Wr = dy.parameter(self.p_Wr)
        b = dy.parameter(self.p_b)
        batch_size = len(transitions)
        ha = []
        c = []
        self.hfinals = []
        hfinal_state = None
        cfinal_state = None
        self.cfinals = []
        for i in range(batch_size):
            hstack = []
            cstack = []
            htmp = []
            count = 0
            for j in range(len(transitions[i])):
                if transitions[i][j] == 0:
                    #print("Shift")
                    #shift onto stack
                    e1 = dy.reshape(es[count],
                                    (batch_size, self.hidden_dim))[i]
                    count += 1
                    hstack.append(e1)
                    cstack.append(e1)
                elif transitions[i][j] == 1:
                    #reduce
                    #print("Reduce")
                    h1 = hstack.pop()
                    h2 = hstack.pop()
                    c1 = cstack.pop()
                    c2 = cstack.pop()
                    tmp = dy.affine_transform([b, Wl, h1, Wr, h2])
                    i_gate = dy.pick_range(tmp, 0, self.hidden_dim)
                    fl_gate = dy.pick_range(tmp, self.hidden_dim,
                                            self.hidden_dim * 2)
                    fr_gate = dy.pick_range(tmp, self.hidden_dim * 2,
                                            self.hidden_dim * 3)
                    o_gate = dy.pick_range(tmp, self.hidden_dim * 3,
                                           self.hidden_dim * 4)
                    cell_inp = dy.pick_range(tmp, self.hidden_dim * 4,
                                             self.hidden_dim * 5)
                    i_gate = dy.tanh(i_gate)
                    cell_inp = dy.logistic(cell_inp)
                    fl_gate = dy.logistic(fl_gate)
                    fr_gate = dy.logistic(fr_gate)
                    o_gate = dy.logistic(o_gate)
                    c_t = dy.cmult(fl_gate, c1) + dy.cmult(
                        fr_gate, c2) + dy.cmult(i_gate, cell_inp)
                    h_t = dy.cmult(o_gate, dy.tanh(c_t))
                    cstack.append(c_t)
                    hstack.append(h_t)
                    htmp.append(h_t)
                    hfinal_state = h_t
                    cfinal_state = c_t
                else:
                    htmp.append(dy.zeros(self.hidden_dim))
            self.hfinals.append(h_t)
            self.cfinals.append(c_t)
            ha.append(htmp)

        self._final_states = [
            FinalTransducerState(dy.concatenate_to_batch(self.hfinals),
                                 dy.concatenate_to_batch(self.cfinals))
        ]
        ha = list(zip_longest(*ha))
        hh = []
        for x in ha:
            hh.append(list(x))
        k = [
            dy.reshape(dy.concatenate(xx), (xx[0].dim()[0][0], len(xx)))
            for xx in hh
        ]
        return ExpressionSequence(expr_list=k)
Ejemplo n.º 14
0
    def __call__(self, embed_sent):
        batch_size = embed_sent[0].dim()[1]
        # Softmax + segment decision
        encodings = self.embed_encoder(embed_sent)
        if self.learn_segmentation:
            segment_decisions, segment_logsoftmaxes = self.sample_segmentation(
                encodings, batch_size)
        else:
            segment_decisions, segment_logsoftmaxes = self.sample_segmentation(
                encodings, batch_size, self._src)
        # Some checks
        assert len(encodings) == len(segment_decisions), \
               "Encoding={}, segment={}".format(len(encodings), len(segment_decisions))
        # The last segment decision should be equal to 1
        if len(segment_decisions) > 0:
            segment_decisions[-1] = numpy.ones(segment_decisions[-1].shape,
                                               dtype=int)
        # Buffer for output
        buffers = [[] for _ in range(batch_size)]
        outputs = [[] for _ in range(batch_size)]
        last_segment = [-1 for _ in range(batch_size)]
        length_prior = [0 for _ in range(batch_size)]
        length_div = [0 for _ in range(batch_size)]
        self.segment_transducer.set_input_size(batch_size, len(encodings))
        # Loop through all the frames (word / item) in input.
        for j, (encoding, segment_decision) in enumerate(
                six.moves.zip(encodings, segment_decisions)):
            # For each decision in the batch
            for i, decision in enumerate(segment_decision):
                # If segment for this particular input
                decision = int(decision)
                if decision == SegmentingAction.DELETE.value:
                    continue
                # Get the particular encoding for that batch item
                encoding_i = dy.pick_batch_elem(encoding, i)
                # Append the encoding for this item to the buffer
                buffers[i].append(encoding_i)
                if decision == SegmentingAction.SEGMENT.value:
                    expr_seq = expression_sequence.ExpressionSequence(
                        expr_list=buffers[i])
                    transduce_output = self.segment_transducer.transduce(
                        expr_seq)
                    outputs[i].append(transduce_output)
                    buffers[i] = []
                    # Calculate length prior
                    length_prior[i] += numpy.log(
                        poisson.pmf(j - last_segment[i], self.length_prior) +
                        1e-10)
                    length_div[i] += 1
                    last_segment[i] = j
                self.segment_transducer.next_item()
        length_prior = list(
            six.moves.map(lambda i: length_prior[i] / length_div[i],
                          range(len(length_prior))))
        # Padding
        max_col = max(len(xs) for xs in outputs)
        P0 = dy.vecInput(self.segment_transducer.encoder.hidden_dim)

        def pad(xs):
            deficit = max_col - len(xs)
            if deficit > 0:
                xs.extend([P0 for _ in range(deficit)])
            return xs

        outputs = dy.concatenate_to_batch(
            list(
                six.moves.map(lambda xs: dy.concatenate_cols(pad(xs)),
                              outputs)))
        self.segment_decisions = segment_decisions
        self.segment_logsoftmaxes = segment_logsoftmaxes
        # Packing output together
        if self.train and self.learn_segmentation:
            self.segment_length_prior = dy.inputTensor(length_prior,
                                                       batched=True)
            if self.use_baseline:
                self.bs = list(
                    six.moves.map(lambda x: self.baseline(dy.nobackprop(x)),
                                  encodings))
        if not self.train:
            self.set_report_input(segment_decisions)
        self._final_encoder_state = [FinalTransducerState(encodings[-1])]
        # Return the encoded batch by the size of [(encode,segment)] * batch_size
        return expression_sequence.ExpressionSequence(expr_tensor=outputs)