Example #1
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = src.dim()[0][1]
    # src_channels = 1
    batch_size = src.dim()[1]

    # convolution and pooling layers
    # src dim is ((40, 1000), 128)
    src = padding(src, self.filter_width[0]+3)
    l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128)
    pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128)

    pool1 = padding(pool1, self.filter_width[1]+3)
    l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128)
    pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128)

    pool2 = padding(pool2, self.filter_width[2])
    l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128)
    pool3 = dy.max_dim(l3, d = 1)

    my_norm = dy.l2_norm(pool3) + 1e-6
    output = dy.cdiv(pool3,my_norm)
    output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size)

    return ExpressionSequence(expr_tensor=output)
Example #2
0
    def __call__(self, es):
        mask = es.mask
        # first layer
        forward_es = self.forward_layers[0](es)
        rev_backward_es = self.backward_layers[0](
            ReversedExpressionSequence(es))

        for layer_i in range(1, len(self.forward_layers)):
            new_forward_es = self.forward_layers[layer_i](
                [forward_es,
                 ReversedExpressionSequence(rev_backward_es)])
            rev_backward_es = ExpressionSequence(self.backward_layers[layer_i](
                [ReversedExpressionSequence(forward_es),
                 rev_backward_es]).as_list(),
                                                 mask=mask)
            forward_es = new_forward_es

        self._final_states = [FinalTransducerState(dy.concatenate([self.forward_layers[layer_i].get_final_states()[0].main_expr(),
                                                                self.backward_layers[layer_i].get_final_states()[0].main_expr()]),
                                                dy.concatenate([self.forward_layers[layer_i].get_final_states()[0].cell_expr(),
                                                                self.backward_layers[layer_i].get_final_states()[0].cell_expr()])) \
                              for layer_i in range(len(self.forward_layers))]
        return ExpressionSequence(expr_list=[
            dy.concatenate([forward_es[i], rev_backward_es[-i - 1]])
            for i in range(len(forward_es))
        ],
                                  mask=mask)
Example #3
0
    def transduce(self, sent: ExpressionSequence) -> ExpressionSequence:
        if self.pos_encoding_type == "trigonometric":
            if self.position_encoding_block is None or self.position_encoding_block.shape[
                    2] < len(sent):
                self.initialize_position_encoding(
                    int(len(sent) * 1.2),
                    self.input_dim if self.pos_encoding_combine == "add" else
                    self.pos_encoding_size)
            encoding = dy.inputTensor(
                self.position_encoding_block[0, :, :len(sent)])
        elif self.pos_encoding_type == "embedding":
            encoding = self.positional_embedder.embed_sent(
                len(sent)).as_tensor()
        if self.pos_encoding_type:
            if self.pos_encoding_combine == "add":
                sent = ExpressionSequence(expr_tensor=sent.as_tensor() +
                                          encoding,
                                          mask=sent.mask)
            else:  # concat
                sent = ExpressionSequence(expr_tensor=dy.concatenate(
                    [sent.as_tensor(), encoding]),
                                          mask=sent.mask)

        elif self.pos_encoding_type:
            raise ValueError(f"unknown encoding type {self.pos_encoding_type}")
        for module in self.modules:
            enc_sent = module.transduce(sent)
            sent = enc_sent
        self._final_states = [transducer.FinalTransducerState(sent[-1])]
        return sent
Example #4
0
  def transduce(self, es):
    """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one.

    Args:
      es: a list of Expression

    see also add_inputs(xs), including for explanation of differences between
    add_inputs and this function.
    """
    es = self.builder_layers[0](es)
    self._final_states = [self.builder_layers[0].get_final_states()[0]]

    if len(self.builder_layers) == 1:
      return es

    for l in self.builder_layers[1:]:
      es = ExpressionSequence(expr_list=self._sum_lists(l(es), es))
      self._final_states.append(FinalTransducerState(es[-1], l.get_final_states()[0].cell_expr()))

    last_output = self.builder_layers[-1](es)

    if self.add_to_output:
      self._final_states.append(FinalTransducerState(last_output[-1], self.builder_layers[-1].get_final_states()[0].cell_expr()))
      return ExpressionSequence(expr_list=self._sum_lists(last_output, es))
    else:
      self._final_states.append(self.builder_layers[-1].get_final_states()[0])
      return last_output
Example #5
0
    def transduce(self, embed_sent: ExpressionSequence) -> ExpressionSequence:
        src = embed_sent.as_tensor()

        sent_len = src.dim()[0][1]
        batch_size = src.dim()[1]
        pad_size = (self.window_receptor -
                    1) / 2  #TODO adapt it also for even window size

        src = dy.concatenate([
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src,
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size)
        ],
                             d=1)
        padded_sent_len = sent_len + 2 * pad_size

        conv1 = dy.parameter(self.pConv1)
        bias1 = dy.parameter(self.pBias1)
        src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1),
                             batch_size=batch_size)
        cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1])

        hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1),
                                  batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)

        for conv_hid, bias_hid in self.builder_layers:
            hidden_layer = dy.conv2d_bias(hidden_layer,
                                          dy.parameter(conv_hid),
                                          dy.parameter(bias_hid),
                                          stride=[1, 1])
            hidden_layer = dy.reshape(hidden_layer,
                                      (self.internal_dim, sent_len, 1),
                                      batch_size=batch_size)
            if self.non_linearity is 'linear':
                hidden_layer = hidden_layer
            elif self.non_linearity is 'tanh':
                hidden_layer = dy.tanh(hidden_layer)
            elif self.non_linearity is 'relu':
                hidden_layer = dy.rectify(hidden_layer)
            elif self.non_linearity is 'sigmoid':
                hidden_layer = dy.logistic(hidden_layer)
        last_conv = dy.parameter(self.last_conv)
        last_bias = dy.parameter(self.last_bias)
        output = dy.conv2d_bias(hidden_layer,
                                last_conv,
                                last_bias,
                                stride=[1, 1])
        output = dy.reshape(output, (sent_len, self.output_dim),
                            batch_size=batch_size)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Example #6
0
 def transduce(self, seq: ExpressionSequence) -> ExpressionSequence:
   seq_tensor = self.child.transduce(seq).as_tensor() + seq.as_tensor()
   if self.layer_norm:
     d = seq_tensor.dim()
     seq_tensor = dy.reshape(seq_tensor, (d[0][0],), batch_size=d[0][1]*d[1])
     seq_tensor = dy.layer_norm(seq_tensor, self.ln_g, self.ln_b)
     seq_tensor = dy.reshape(seq_tensor, d[0], batch_size=d[1])
   return ExpressionSequence(expr_tensor=seq_tensor)
Example #7
0
    def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence:
        """
    transduce the sequence

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """

        Wq, Wk, Wv, Wo = [
            dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo)
        ]
        bq, bk, bv, bo = [
            dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo)
        ]

        # Start with a [(length, model_size) x batch] tensor
        x = expr_seq.as_transposed_tensor()
        x_len = x.dim()[0][0]
        x_batch = x.dim()[1]
        # Get the query key and value vectors
        # TODO: do we need bias broadcasting in DyNet?
        # q = dy.affine_transform([bq, x, Wq])
        # k = dy.affine_transform([bk, x, Wk])
        # v = dy.affine_transform([bv, x, Wv])
        q = bq + x * Wq
        k = bk + x * Wk
        v = bv + x * Wv

        # Split to batches [(length, head_dim) x batch * num_heads] tensor
        q, k, v = [
            dy.reshape(x, (x_len, self.head_dim),
                       batch_size=x_batch * self.num_heads) for x in (q, k, v)
        ]

        # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys
        attn_score = q * dy.transpose(k) / sqrt(self.head_dim)
        if expr_seq.mask is not None:
            mask = dy.inputTensor(np.repeat(
                expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(),
                                  batched=True) * -1e10
            attn_score = attn_score + mask
        attn_prob = dy.softmax(attn_score, d=1)
        # Reduce using attention and resize to match [(length, model_size) x batch]
        o = dy.reshape(attn_prob * v, (x_len, self.input_dim),
                       batch_size=x_batch)
        # Final transformation
        # o = dy.affine_transform([bo, attn_prob * v, Wo])
        o = bo + o * Wo

        expr_seq = ExpressionSequence(expr_transposed_tensor=o,
                                      mask=expr_seq.mask)

        self._final_states = [FinalTransducerState(expr_seq[-1], None)]

        return expr_seq
Example #8
0
 def exprseq_pooling(self, exprseq):
   # Reduce to vector
   exprseq = ExpressionSequence(expr_tensor=exprseq.mask.add_to_tensor_expr(exprseq.as_tensor(),-1e10), mask=exprseq.mask)
   if exprseq.expr_tensor != None:
     if len(exprseq.expr_tensor.dim()[0]) > 1:
       return dy.max_dim(exprseq.expr_tensor, d=1)
     else:
       return exprseq.expr_tensor
   else:
     return dy.emax(exprseq.expr_list)
Example #9
0
 def transduce(self, x: ExpressionSequence) -> ExpressionSequence:
   x_T = x.as_transposed_tensor()
   scores = x_T * dy.parameter(self.W)
   if x.mask is not None:
     scores = x.mask.add_to_tensor_expr(scores, multiplicator=-100.0, time_first=True)
   if self.pos_enc_max:
     seq_len = x_T.dim()[0][0]
     pos_enc = self.pos_enc[:seq_len,:]
     scores = dy.cmult(scores, dy.inputTensor(pos_enc))
   attention = dy.softmax(scores)
   output_expr = x.as_tensor() * attention
   return expression_sequence.ExpressionSequence(expr_tensor=output_expr, mask=None)
Example #10
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
   sent_len = len(src)
   embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len])
   if self.op == 'sum':
     output = embeddings + src.as_tensor()
   elif self.op == 'concat':
     output = dy.concatenate([embeddings, src.as_tensor()])
   else:
     raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")')
   output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask)
   self._final_states = [FinalTransducerState(output_seq[-1])]
   return output_seq
Example #11
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = 1
    batch_size = src.dim()[1]

    W = dy.parameter(self.pW)
    b = dy.parameter(self.pb)

    src = dy.reshape(src, (src_height, src_width), batch_size=batch_size) # ((276, 80, 3), 1)
    # convolution and pooling layers
    l1 = (W*src)+b
    output = dy.cdiv(l1,dy.sqrt(dy.squared_norm(l1)))
    return ExpressionSequence(expr_tensor=output)
Example #12
0
    def embed_sent(self, sent):
        """Embed a full sentence worth of words. By default, just do a for loop.

    Args:
      sent: This will generally be a list of word IDs, but could also be a list of strings or some other format.
            It could also be batched, in which case it will be a (possibly masked) :class:`xnmt.batcher.Batch` object
    
    Returns:
      xnmt.expression_sequence.ExpressionSequence: An expression sequence representing vectors of each word in the input.
    """
        # single mode
        if not xnmt.batcher.is_batched(sent):
            embeddings = [self.embed(word) for word in sent]
        # minibatch mode
        else:
            embeddings = []
            seq_len = len(sent[0])
            for single_sent in sent:
                assert len(single_sent) == seq_len
            for word_i in range(seq_len):
                batch = xnmt.batcher.mark_as_batch(
                    [single_sent[word_i] for single_sent in sent])
                embeddings.append(self.embed(batch))

        return ExpressionSequence(
            expr_list=embeddings,
            mask=sent.mask if xnmt.batcher.is_batched(sent) else None)
Example #13
0
    def __call__(self, expr_seq):
        """
    transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c)

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """
        if isinstance(expr_seq, ExpressionSequence):
            expr_seq = [expr_seq]
        batch_size = expr_seq[0][0].dim()[1]
        seq_len = len(expr_seq[0])

        if self.dropout_rate > 0.0 and self.train:
            self.set_dropout_masks(batch_size=batch_size)

        cur_input = expr_seq
        self._final_states = []
        for layer_i in range(self.num_layers):
            h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            for pos_i in range(seq_len):
                x_t = [cur_input[j][pos_i] for j in range(len(cur_input))]
                if isinstance(x_t, dy.Expression):
                    x_t = [x_t]
                elif type(x_t) != list:
                    x_t = list(x_t)
                if self.dropout_rate > 0.0 and self.train:
                    # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
                    gates_t = dy.vanilla_lstm_gates_dropout_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i], self.dropout_mask_x[layer_i],
                        self.dropout_mask_h[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                else:
                    gates_t = dy.vanilla_lstm_gates_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                c_t = dy.vanilla_lstm_c(c[-1], gates_t)
                h_t = dy.vanilla_lstm_h(c_t, gates_t)
                if expr_seq[0].mask is None or np.isclose(
                        np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]),
                        0.0):
                    c.append(c_t)
                    h.append(h_t)
                else:
                    c.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        c_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 c[-1], pos_i, False))
                    h.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        h_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 h[-1], pos_i, False))
            self._final_states.append(FinalTransducerState(h[-1], c[-1]))
            cur_input = [h[1:]]

        return ExpressionSequence(expr_list=h[1:], mask=expr_seq[0].mask)
Example #14
0
 def transduce(self, embeds):
     expr_seq = []
     seq_len = embeds.dim()[0][1]
     for i in range(seq_len):
         expr_seq.append(dy.max_dim(dy.select_cols(embeds, [i]), 1))
     encodings = self.seq_transducer.transduce(ExpressionSequence(expr_seq))
     return self.seq_transducer.get_final_states()[-1].main_expr()
Example #15
0
  def __call__(self, es):
    """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    :param es: an ExpressionSequence
    """

    es_list = [es]

    for layer_i, (fb, bb) in enumerate(self.builder_layers):
      reduce_factor = self._reduce_factor_for_layer(layer_i)
      if self.downsampling_method=="concat" and len(es_list[0]) % reduce_factor != 0:
        raise ValueError("For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor. Configure batcher accordingly.")
      fs = fb(es_list)
      bs = bb([ReversedExpressionSequence(es_item) for es_item in es_list])
      if layer_i < len(self.builder_layers) - 1:
        if self.downsampling_method=="skip":
          es_list = [ExpressionSequence(expr_list=fs[::reduce_factor]), ExpressionSequence(expr_list=bs[::reduce_factor][::-1])]
        elif self.downsampling_method=="concat":
          es_len = len(es_list[0])
          es_list_fwd = []
          es_list_bwd = []
          for i in range(0, es_len, reduce_factor):
            for j in range(reduce_factor):
              if i==0:
                es_list_fwd.append([])
                es_list_bwd.append([])
              es_list_fwd[j].append(fs[i+j])
              es_list_bwd[j].append(bs[len(es_list[0])-reduce_factor+j-i])
          es_list = [ExpressionSequence(expr_list=es_list_fwd[j]) for j in range(reduce_factor)] + [ExpressionSequence(expr_list=es_list_bwd[j]) for j in range(reduce_factor)]
        else:
          raise RuntimeError("unknown downsampling_method %s" % self.downsampling_method)
      else:
        # concat final outputs
        ret_es = ExpressionSequence(expr_list=[dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs))])

    self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                            bb.get_final_states()[0].main_expr()]),
                                            dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                            bb.get_final_states()[0].cell_expr()])) \
                          for (fb, bb) in self.builder_layers]

    return ret_es
 def __call__(self, src):
     src = src.as_tensor()
     # convolutional layer
     src = padding(src,
                   src.dim()[0][0],
                   src.dim()[0][1], self.filter_width, self.stride,
                   src.dim()[1])
     l1 = dy.rectify(
         dy.conv2d(src,
                   dy.parameter(self.filter_conv),
                   stride=[self.stride, self.stride],
                   is_valid=True))
     timestep = l1.dim()[0][1]
     features = l1.dim()[0][2]
     batch_size = l1.dim()[1]
     # transpose l1 to be (timesetp, dim), but keep the batch_size.
     rhn_in = dy.reshape(l1, (timestep, features), batch_size=batch_size)
     rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)]
     for l in range(self.rhn_num_hidden_layers):
         rhn_out = []
         # initialize a random vector for the first state vector, keep the same batch size.
         prev_state = dy.parameter(self.init[l])
         # begin recurrent high way network
         for t in range(timestep):
             for m in range(0, self.rhn_microsteps):
                 H = dy.affine_transform([
                     dy.parameter(self.recur[l][m][1]),
                     dy.parameter(self.recur[l][m][0]), prev_state
                 ])
                 T = dy.affine_transform([
                     dy.parameter(self.recur[l][m][3]),
                     dy.parameter(self.recur[l][m][2]), prev_state
                 ])
                 if m == 0:
                     H += dy.parameter(self.linear[l][0]) * rhn_in[t]
                     T += dy.parameter(self.linear[l][1]) * rhn_in[t]
                 H = dy.tanh(H)
                 T = dy.logistic(T)
                 prev_state = dy.cmult(1 - T, prev_state) + dy.cmult(
                     T, H)  # ((1024, ), batch_size)
             rhn_out.append(prev_state)
         if self.residual and l > 0:
             rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)]
         rhn_in = rhn_out
     # Compute the attention-weighted average of the activations
     rhn_in = dy.concatenate_cols(rhn_in)
     scores = dy.transpose(dy.parameter(self.attention[0][1])) * dy.tanh(
         dy.parameter(self.attention[0][0]) *
         rhn_in)  # ((1,510), batch_size)
     scores = dy.reshape(scores, (scores.dim()[0][1], ),
                         batch_size=scores.dim()[1])
     attn_out = rhn_in * dy.softmax(
         scores
     )  # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size)
     return ExpressionSequence(expr_tensor=attn_out)
Example #17
0
  def transduce(self, es):
    forward_e = self.forward_layer(es)
    backward_e = self.backward_layer(ReversedExpressionSequence(es))
    self._final_states = [FinalTransducerState(dy.concatenate([self.forward_layer.get_final_states()[0].main_expr(),
                                                            self.backward_layer.get_final_states()[0].main_expr()]),
                                            dy.concatenate([self.forward_layer.get_final_states()[0].cell_expr(),
                                                            self.backward_layer.get_final_states()[0].cell_expr()]))]

    output = self.residual_network.transduce(ExpressionSequence(expr_list=[dy.concatenate([f,b]) for f,b in zip(forward_e, ReversedExpressionSequence(backward_e))]))
    self._final_states += self.residual_network.get_final_states()
    return output
Example #18
0
 def transduce(self,
               embed_sent: ExpressionSequence) -> List[ExpressionSequence]:
     batch_size = embed_sent[0].dim()[1]
     actions = self.sample_segmentation(embed_sent, batch_size)
     sample_size = len(actions)
     embeddings = dy.concatenate(embed_sent.expr_list, d=1)
     embeddings.value()
     #
     composed_words = []
     for i in range(batch_size):
         sequence = dy.pick_batch_elem(embeddings, i)
         # For each sampled segmentations
         for j, sample in enumerate(actions):
             lower_bound = 0
             # Read every 'segment' decision
             for k, upper_bound in enumerate(sample[i]):
                 char_sequence = dy.pick_range(sequence, lower_bound,
                                               upper_bound + 1, 1)
                 composed_words.append(
                     (dy.pick_range(sequence, lower_bound, upper_bound + 1,
                                    1), j, i, k, lower_bound,
                      upper_bound + 1))
                 #self.segment_composer.set_word_boundary(lower_bound, upper_bound, self.src_sent[i])
                 #composed = self.segment_composer.transduce(char_sequence)
                 #outputs[j][i].append(composed)
                 lower_bound = upper_bound + 1
     outputs = self.segment_composer.compose(composed_words, sample_size,
                                             batch_size)
     # Padding + return
     try:
         if self.length_prior:
             seg_size_unpadded = [[
                 len(outputs[i][j]) for j in range(batch_size)
             ] for i in range(sample_size)]
         enc_outputs = []
         for batched_sampled_sentence in outputs:
             sampled_sentence, segment_mask = self.pad(
                 batched_sampled_sentence)
             expr_seq = ExpressionSequence(
                 expr_tensor=dy.concatenate_to_batch(sampled_sentence),
                 mask=segment_mask)
             sent_context = self.final_transducer.transduce(expr_seq)
             self.final_states.append(
                 self.final_transducer.get_final_states())
             enc_outputs.append(sent_context)
         return CompoundSeqExpression(enc_outputs)
     finally:
         if self.length_prior:
             self.seg_size_unpadded = seg_size_unpadded
         self.compose_output = outputs
         self.segment_actions = actions
         if not self.train and self.compute_report:
             self.add_sent_for_report({"segment_actions": actions})
Example #19
0
    def transduce(self, x: ExpressionSequence) -> ExpressionSequence:
        seq_len = len(x)
        batch_size = x[0].dim()[1]

        att_mask = None
        if self.diagonal_mask_width is not None:
            if self.diagonal_mask_width is None:
                att_mask = np.zeros((seq_len, seq_len))
            else:
                att_mask = np.ones((seq_len, seq_len))
                for i in range(seq_len):
                    from_i = max(0, i - self.diagonal_mask_width // 2)
                    to_i = min(seq_len, i + self.diagonal_mask_width // 2 + 1)
                    att_mask[from_i:to_i, from_i:to_i] = 0.0

        mid = self.self_attn(x=x,
                             att_mask=att_mask,
                             batch_mask=x.mask.np_arr if x.mask else None,
                             p=self.dropout)
        if self.downsample_factor > 1:
            seq_len = int(math.ceil(seq_len / float(self.downsample_factor)))
        hidden_dim = mid.dim()[0][0]
        out_mask = x.mask
        if self.downsample_factor > 1 and out_mask is not None:
            out_mask = out_mask.lin_subsampled(
                reduce_factor=self.downsample_factor)
        if self.ff_lstm:
            mid_re = dy.reshape(mid, (hidden_dim, seq_len),
                                batch_size=batch_size)
            out = self.feed_forward.transduce(
                ExpressionSequence(expr_tensor=mid_re, mask=out_mask))
            out = dy.reshape(out.as_tensor(), (hidden_dim, ),
                             batch_size=seq_len * batch_size)
        else:
            out = self.feed_forward.transduce(mid, p=self.dropout)

        self._recent_output = out
        return ExpressionSequence(expr_tensor=dy.reshape(
            out, (out.dim()[0][0], seq_len), batch_size=batch_size),
                                  mask=out_mask)
Example #20
0
  def embed_sent(self, sent):
    # single mode
    if not xnmt.batcher.is_batched(sent):
      embeddings = [self.embed(word) for word in sent]
    # minibatch mode
    else:
      embeddings = []
      seq_len = len(sent[0])
      for single_sent in sent: assert len(single_sent)==seq_len
      for word_i in range(seq_len):
        batch = xnmt.batcher.mark_as_batch([single_sent[word_i] for single_sent in sent])
        embeddings.append(self.embed(batch))

    return ExpressionSequence(expr_list=embeddings, mask=sent.mask if xnmt.batcher.is_batched(sent) else None)
Example #21
0
 def embed_sent(self, sent):
   # TODO refactor: seems a bit too many special cases that need to be distinguished
   batched = xnmt.batcher.is_batched(sent)
   first_sent = sent[0] if batched else sent
   if hasattr(first_sent, "get_array"):
     if not batched:
       return LazyNumpyExpressionSequence(lazy_data=sent.get_array())
     else:
       return LazyNumpyExpressionSequence(lazy_data=xnmt.batcher.mark_as_batch(
                                          [s for s in sent]),
                                          mask=sent.mask)
   else:
     if not batched:
       embeddings = [self.embed(word) for word in sent]
     else:
       embeddings = []
       for word_i in range(sent.sent_len()):
         embeddings.append(self.embed(xnmt.batcher.mark_as_batch([single_sent[word_i] for single_sent in sent])))
     return ExpressionSequence(expr_list=embeddings, mask=sent.mask)
Example #22
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        l1 = dy.affine_transform([b, W, src])
        output = l1
        if self.nonlinearity is 'linear':
            output = l1
        elif self.nonlinearity is 'sigmoid':
            output = dy.logistic(l1)
        elif self.nonlinearity is 'tanh':
            output = 2 * dy.logistic(l1) - 1
        elif self.nonlinearity is 'relu':
            output = dy.rectify(l1)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Example #23
0
    def __call__(self, es):
        """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    :param es: an ExpressionSequence
    """

        es_list = [es]
        zero_pad = None
        batch_size = es_list[0][0].dim()[1]

        for layer_i, (fb, bb) in enumerate(self.builder_layers):
            reduce_factor = self._reduce_factor_for_layer(layer_i)
            while self.downsampling_method == "concat" and len(
                    es_list[0]) % reduce_factor != 0:
                for es_i in range(len(es_list)):
                    expr_list = es_list[es_i].as_list()
                    if zero_pad is None or zero_pad.dim(
                    )[0][0] != expr_list[0].dim()[0][0]:
                        zero_pad = dy.zeros(dim=expr_list[0].dim()[0][0],
                                            batch_size=batch_size)
                    expr_list.append(zero_pad)
                    es_list[es_i] = ExpressionSequence(expr_list=expr_list)
            fs = fb(es_list)
            bs = bb(
                [ReversedExpressionSequence(es_item) for es_item in es_list])
            if layer_i < len(self.builder_layers) - 1:
                if self.downsampling_method == "skip":
                    es_list = [
                        ExpressionSequence(expr_list=fs[::reduce_factor]),
                        ExpressionSequence(expr_list=bs[::reduce_factor][::-1])
                    ]
                elif self.downsampling_method == "concat":
                    es_len = len(es_list[0])
                    es_list_fwd = []
                    es_list_bwd = []
                    for i in range(0, es_len, reduce_factor):
                        for j in range(reduce_factor):
                            if i == 0:
                                es_list_fwd.append([])
                                es_list_bwd.append([])
                            es_list_fwd[j].append(fs[i + j])
                            es_list_bwd[j].append(bs[len(es_list[0]) -
                                                     reduce_factor + j - i])
                    es_list = [
                        ExpressionSequence(expr_list=es_list_fwd[j])
                        for j in range(reduce_factor)
                    ] + [
                        ExpressionSequence(expr_list=es_list_bwd[j])
                        for j in range(reduce_factor)
                    ]
                else:
                    raise RuntimeError("unknown downsampling_method %s" %
                                       self.downsampling_method)
            else:
                # concat final outputs
                ret_es = ExpressionSequence(expr_list=[
                    dy.concatenate([f, b])
                    for f, b in zip(fs, ReversedExpressionSequence(bs))
                ])

        self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                                bb.get_final_states()[0].main_expr()]),
                                                dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                                bb.get_final_states()[0].cell_expr()])) \
                              for (fb, bb) in self.builder_layers]

        return ret_es
Example #24
0
File: lstm.py Project: anhad13/xnmt
    def __call__(self, es, transitions):
        mask = es.mask
        #import pdb;pdb.set_trace()
        transitions = [t + [0, 1] for t in transitions]
        transitions = np.array(transitions)
        maxlen = max(len(r) for r in transitions)
        Wl = dy.parameter(self.p_Wl)
        Wr = dy.parameter(self.p_Wr)
        b = dy.parameter(self.p_b)
        batch_size = len(transitions)
        ha = []
        c = []
        self.hfinals = []
        hfinal_state = None
        cfinal_state = None
        self.cfinals = []
        for i in range(batch_size):
            hstack = []
            cstack = []
            htmp = []
            count = 0
            for j in range(len(transitions[i])):
                if transitions[i][j] == 0:
                    #print("Shift")
                    #shift onto stack
                    e1 = dy.reshape(es[count],
                                    (batch_size, self.hidden_dim))[i]
                    count += 1
                    hstack.append(e1)
                    cstack.append(e1)
                elif transitions[i][j] == 1:
                    #reduce
                    #print("Reduce")
                    h1 = hstack.pop()
                    h2 = hstack.pop()
                    c1 = cstack.pop()
                    c2 = cstack.pop()
                    tmp = dy.affine_transform([b, Wl, h1, Wr, h2])
                    i_gate = dy.pick_range(tmp, 0, self.hidden_dim)
                    fl_gate = dy.pick_range(tmp, self.hidden_dim,
                                            self.hidden_dim * 2)
                    fr_gate = dy.pick_range(tmp, self.hidden_dim * 2,
                                            self.hidden_dim * 3)
                    o_gate = dy.pick_range(tmp, self.hidden_dim * 3,
                                           self.hidden_dim * 4)
                    cell_inp = dy.pick_range(tmp, self.hidden_dim * 4,
                                             self.hidden_dim * 5)
                    i_gate = dy.tanh(i_gate)
                    cell_inp = dy.logistic(cell_inp)
                    fl_gate = dy.logistic(fl_gate)
                    fr_gate = dy.logistic(fr_gate)
                    o_gate = dy.logistic(o_gate)
                    c_t = dy.cmult(fl_gate, c1) + dy.cmult(
                        fr_gate, c2) + dy.cmult(i_gate, cell_inp)
                    h_t = dy.cmult(o_gate, dy.tanh(c_t))
                    cstack.append(c_t)
                    hstack.append(h_t)
                    htmp.append(h_t)
                    hfinal_state = h_t
                    cfinal_state = c_t
                else:
                    htmp.append(dy.zeros(self.hidden_dim))
            self.hfinals.append(h_t)
            self.cfinals.append(c_t)
            ha.append(htmp)

        self._final_states = [
            FinalTransducerState(dy.concatenate_to_batch(self.hfinals),
                                 dy.concatenate_to_batch(self.cfinals))
        ]
        ha = list(zip_longest(*ha))
        hh = []
        for x in ha:
            hh.append(list(x))
        k = [
            dy.reshape(dy.concatenate(xx), (xx[0].dim()[0][0], len(xx)))
            for xx in hh
        ]
        return ExpressionSequence(expr_list=k)
Example #25
0
    def __call__(self, x: dy.Expression, att_mask: np.ndarray,
                 batch_mask: np.ndarray, p: float):
        """
    x: expression of dimensions (input_dim, time) x batch
    att_mask: numpy array of dimensions (time, time); pre-transposed
    batch_mask: numpy array of dimensions (batch, time)
    p: dropout prob
    """
        sent_len = x.dim()[0][1]
        batch_size = x[0].dim()[1]

        if self.downsample_factor > 1:
            if sent_len % self.downsample_factor != 0:
                raise ValueError(
                    "For 'reshape' downsampling, sequence lengths must be multiples of the downsampling factor. "
                    "Configure batcher accordingly.")
            if batch_mask is not None:
                batch_mask = batch_mask[:, ::self.downsample_factor]
            sent_len_out = sent_len // self.downsample_factor
            sent_len = sent_len_out
            out_mask = x.mask
            if self.downsample_factor > 1 and out_mask is not None:
                out_mask = out_mask.lin_subsampled(
                    reduce_factor=self.downsample_factor)

            x = ExpressionSequence(expr_tensor=dy.reshape(
                x.as_tensor(), (x.dim()[0][0] * self.downsample_factor,
                                x.dim()[0][1] / self.downsample_factor),
                batch_size=batch_size),
                                   mask=out_mask)
            residual = SAAMTimeDistributed()(x)
        else:
            residual = SAAMTimeDistributed()(x)
            sent_len_out = sent_len
        if self.model_dim != self.input_dim * self.downsample_factor:
            residual = self.res_shortcut(residual)

        # Concatenate all the words together for doing vectorized affine transform
        if self.kq_pos_encoding_type is None:
            kvq_lin = self.linear_kvq(SAAMTimeDistributed()(x))
            key_up = self.shape_projection(
                dy.pick_range(kvq_lin, 0, self.head_count * self.dim_per_head),
                batch_size)
            value_up = self.shape_projection(
                dy.pick_range(kvq_lin, self.head_count * self.dim_per_head,
                              2 * self.head_count * self.dim_per_head),
                batch_size)
            query_up = self.shape_projection(
                dy.pick_range(kvq_lin, 2 * self.head_count * self.dim_per_head,
                              3 * self.head_count * self.dim_per_head),
                batch_size)
        else:
            assert self.kq_pos_encoding_type == "embedding"
            encoding = self.kq_positional_embedder.embed_sent(
                sent_len).as_tensor()
            kq_lin = self.linear_kq(SAAMTimeDistributed()(ExpressionSequence(
                expr_tensor=dy.concatenate([x.as_tensor(), encoding]))))
            key_up = self.shape_projection(
                dy.pick_range(kq_lin, 0, self.head_count * self.dim_per_head),
                batch_size)
            query_up = self.shape_projection(
                dy.pick_range(kq_lin, self.head_count * self.dim_per_head,
                              2 * self.head_count * self.dim_per_head),
                batch_size)
            v_lin = self.linear_v(SAAMTimeDistributed()(x))
            value_up = self.shape_projection(v_lin, batch_size)

        if self.cross_pos_encoding_type:
            assert self.cross_pos_encoding_type == "embedding"
            emb1 = dy.pick_range(dy.parameter(self.cross_pos_emb_p1), 0,
                                 sent_len)
            emb2 = dy.pick_range(dy.parameter(self.cross_pos_emb_p2), 0,
                                 sent_len)
            key_up = dy.reshape(key_up,
                                (sent_len, self.dim_per_head, self.head_count),
                                batch_size=batch_size)
            key_up = dy.concatenate_cols(
                [dy.cmult(key_up, emb1),
                 dy.cmult(key_up, emb2)])
            key_up = dy.reshape(key_up, (sent_len, self.dim_per_head * 2),
                                batch_size=self.head_count * batch_size)
            query_up = dy.reshape(
                query_up, (sent_len, self.dim_per_head, self.head_count),
                batch_size=batch_size)
            query_up = dy.concatenate_cols(
                [dy.cmult(query_up, emb2),
                 dy.cmult(query_up, -emb1)])
            query_up = dy.reshape(query_up, (sent_len, self.dim_per_head * 2),
                                  batch_size=self.head_count * batch_size)

        scaled = query_up * dy.transpose(
            key_up / math.sqrt(self.dim_per_head)
        )  # scale before the matrix multiplication to save memory

        # Apply Mask here
        if not self.ignore_masks:
            if att_mask is not None:
                att_mask_inp = att_mask * -100.0
                if self.downsample_factor > 1:
                    att_mask_inp = att_mask_inp[::self.downsample_factor, ::
                                                self.downsample_factor]
                scaled += dy.inputTensor(att_mask_inp)
            if batch_mask is not None:
                # reshape (batch, time) -> (time, head_count*batch), then *-100
                inp = np.resize(np.broadcast_to(batch_mask.T[:, np.newaxis, :],
                                                (sent_len, self.head_count, batch_size)),
                                (1, sent_len, self.head_count * batch_size)) \
                      * -100
                mask_expr = dy.inputTensor(inp, batched=True)
                scaled += mask_expr
            if self.diag_gauss_mask:
                diag_growing = np.zeros((sent_len, sent_len, self.head_count))
                for i in range(sent_len):
                    for j in range(sent_len):
                        diag_growing[i, j, :] = -(i - j)**2 / 2.0
                e_diag_gauss_mask = dy.inputTensor(diag_growing)
                e_sigma = dy.parameter(self.diag_gauss_mask_sigma)
                if self.square_mask_std:
                    e_sigma = dy.square(e_sigma)
                e_sigma_sq_inv = dy.cdiv(
                    dy.ones(e_sigma.dim()[0], batch_size=batch_size),
                    dy.square(e_sigma))
                e_diag_gauss_mask_final = dy.cmult(e_diag_gauss_mask,
                                                   e_sigma_sq_inv)
                scaled += dy.reshape(e_diag_gauss_mask_final,
                                     (sent_len, sent_len),
                                     batch_size=batch_size * self.head_count)

        # Computing Softmax here.
        attn = dy.softmax(scaled, d=1)
        if LOG_ATTENTION:
            yaml_logger.info({
                "key": "selfatt_mat_ax0",
                "value": np.average(attn.value(), axis=0).dumps(),
                "desc": self.desc
            })
            yaml_logger.info({
                "key": "selfatt_mat_ax1",
                "value": np.average(attn.value(), axis=1).dumps(),
                "desc": self.desc
            })
            yaml_logger.info({
                "key": "selfatt_mat_ax0_ent",
                "value": entropy(attn.value()).dumps(),
                "desc": self.desc
            })
            yaml_logger.info({
                "key": "selfatt_mat_ax1_ent",
                "value": entropy(attn.value().transpose()).dumps(),
                "desc": self.desc
            })

        self.select_att_head = 0
        if self.select_att_head is not None:
            attn = dy.reshape(attn, (sent_len, sent_len, self.head_count),
                              batch_size=batch_size)
            sel_mask = np.zeros((1, 1, self.head_count))
            sel_mask[0, 0, self.select_att_head] = 1.0
            attn = dy.cmult(attn, dy.inputTensor(sel_mask))
            attn = dy.reshape(attn, (sent_len, sent_len),
                              batch_size=self.head_count * batch_size)

        # Applying dropout to attention
        if p > 0.0:
            drop_attn = dy.dropout(attn, p)
        else:
            drop_attn = attn

        # Computing weighted attention score
        attn_prod = drop_attn * value_up

        # Reshaping the attn_prod to input query dimensions
        out = dy.reshape(attn_prod,
                         (sent_len_out, self.dim_per_head * self.head_count),
                         batch_size=batch_size)
        out = dy.transpose(out)
        out = dy.reshape(out, (self.model_dim, ),
                         batch_size=batch_size * sent_len_out)
        #     out = dy.reshape_transpose_reshape(attn_prod, (sent_len_out, self.dim_per_head * self.head_count), (self.model_dim,), pre_batch_size=batch_size, post_batch_size=batch_size*sent_len_out)

        if self.plot_attention:
            from sklearn.metrics.pairwise import cosine_similarity
            assert batch_size == 1
            mats = []
            for i in range(attn.dim()[1]):
                mats.append(dy.pick_batch_elem(attn, i).npvalue())
                self.plot_att_mat(
                    mats[-1], "{}.sent_{}.head_{}.png".format(
                        self.plot_attention, self.plot_attention_counter, i),
                    300)
            avg_mat = np.average(mats, axis=0)
            self.plot_att_mat(
                avg_mat,
                "{}.sent_{}.head_avg.png".format(self.plot_attention,
                                                 self.plot_attention_counter),
                300)
            cosim_before = cosine_similarity(x.as_tensor().npvalue().T)
            self.plot_att_mat(
                cosim_before, "{}.sent_{}.cosim_before.png".format(
                    self.plot_attention, self.plot_attention_counter), 600)
            cosim_after = cosine_similarity(out.npvalue().T)
            self.plot_att_mat(
                cosim_after, "{}.sent_{}.cosim_after.png".format(
                    self.plot_attention, self.plot_attention_counter), 600)
            self.plot_attention_counter += 1

        # Adding dropout and layer normalization
        if p > 0.0:
            res = dy.dropout(out, p) + residual
        else:
            res = out + residual
        ret = self.layer_norm(res)
        return ret
Example #26
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
     output = self.transform(src.as_tensor())
     output_seq = ExpressionSequence(expr_tensor=output)
     self._final_states = [FinalTransducerState(output_seq[-1])]
     return output_seq
Example #27
0
 def __call__(self, output):
   if not isinstance(output, ExpressionSequence):
     output = ExpressionSequence(expr_list=output)
   return output
Example #28
0
    def transduce(self, es: ExpressionSequence) -> ExpressionSequence:
        """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    Args:
      es: an ExpressionSequence
    """
        es_list = [es]

        for layer_i, (fb, bb) in enumerate(self.builder_layers):
            reduce_factor = self._reduce_factor_for_layer(layer_i)

            if es_list[0].mask is None: mask_out = None
            else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor)

            if self.downsampling_method == "concat" and len(
                    es_list[0]) % reduce_factor != 0:
                raise ValueError(
                    f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, "
                    f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. "
                    f"Set Batcher's pad_src_to_multiple argument accordingly.")
            fs = fb.transduce(es_list)
            bs = bb.transduce(
                [ReversedExpressionSequence(es_item) for es_item in es_list])
            if layer_i < len(self.builder_layers) - 1:
                if self.downsampling_method == "skip":
                    es_list = [
                        ExpressionSequence(expr_list=fs[::reduce_factor],
                                           mask=mask_out),
                        ExpressionSequence(expr_list=bs[::reduce_factor][::-1],
                                           mask=mask_out)
                    ]
                elif self.downsampling_method == "concat":
                    es_len = len(es_list[0])
                    es_list_fwd = []
                    es_list_bwd = []
                    for i in range(0, es_len, reduce_factor):
                        for j in range(reduce_factor):
                            if i == 0:
                                es_list_fwd.append([])
                                es_list_bwd.append([])
                            es_list_fwd[j].append(fs[i + j])
                            es_list_bwd[j].append(bs[len(es_list[0]) -
                                                     reduce_factor + j - i])
                    es_list = [ExpressionSequence(expr_list=es_list_fwd[j], mask=mask_out) for j in range(reduce_factor)] + \
                              [ExpressionSequence(expr_list=es_list_bwd[j], mask=mask_out) for j in range(reduce_factor)]
                else:
                    raise RuntimeError(
                        f"unknown downsampling_method {self.downsampling_method}"
                    )
            else:
                # concat final outputs
                ret_es = ExpressionSequence(expr_list=[
                    dy.concatenate([f, b])
                    for f, b in zip(fs, ReversedExpressionSequence(bs))
                ],
                                            mask=mask_out)

        self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                                   bb.get_final_states()[0].main_expr()]),
                                                   dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                                   bb.get_final_states()[0].cell_expr()])) \
                              for (fb, bb) in self.builder_layers]

        return ret_es
Example #29
0
 def __call__(self, sent):
     output = self.builder.transduce(sent)
     if not isinstance(output, ExpressionSequence):
         output = ExpressionSequence(expr_list=output)
     self._final_states = self.builder.get_final_states()
     return output
Example #30
0
 def embed_sent(self, sent_len):
   embeddings = dy.strided_select(dy.parameter(self.embeddings), [1,1], [0,0], [self.emb_dim, sent_len])
   return ExpressionSequence(expr_tensor=embeddings, mask=None)