Esempio n. 1
0
    def __call__(self, expr_seq):
        """
    transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c)

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """
        if isinstance(expr_seq, ExpressionSequence):
            expr_seq = [expr_seq]
        batch_size = expr_seq[0][0].dim()[1]
        seq_len = len(expr_seq[0])

        if self.dropout_rate > 0.0 and self.train:
            self.set_dropout_masks(batch_size=batch_size)

        cur_input = expr_seq
        self._final_states = []
        for layer_i in range(self.num_layers):
            h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            for pos_i in range(seq_len):
                x_t = [cur_input[j][pos_i] for j in range(len(cur_input))]
                if isinstance(x_t, dy.Expression):
                    x_t = [x_t]
                elif type(x_t) != list:
                    x_t = list(x_t)
                if self.dropout_rate > 0.0 and self.train:
                    # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
                    gates_t = dy.vanilla_lstm_gates_dropout_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i], self.dropout_mask_x[layer_i],
                        self.dropout_mask_h[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                else:
                    gates_t = dy.vanilla_lstm_gates_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                c_t = dy.vanilla_lstm_c(c[-1], gates_t)
                h_t = dy.vanilla_lstm_h(c_t, gates_t)
                if expr_seq[0].mask is None or np.isclose(
                        np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]),
                        0.0):
                    c.append(c_t)
                    h.append(h_t)
                else:
                    c.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        c_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 c[-1], pos_i, False))
                    h.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        h_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 h[-1], pos_i, False))
            self._final_states.append(FinalTransducerState(h[-1], c[-1]))
            cur_input = [h[1:]]

        return ExpressionSequence(expr_list=h[1:], mask=expr_seq[0].mask)
Esempio n. 2
0
    def add_input_to_prev(self, prev_state: UniLSTMState, x: Union[dy.Expression, Sequence[dy.Expression]]) \
            -> Tuple[Sequence[dy.Expression]]:
        if isinstance(x, dy.Expression):
            x = [x]
        elif type(x) != list:
            x = list(x)

        if self.dropout_rate > 0.0 and self.train and self.dropout_mask_x is None:
            self.set_dropout_masks()

        new_c, new_h = [], []
        for layer_i in range(self.num_layers):
            if self.dropout_rate > 0.0 and self.train:
                # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
                gates = dy.vanilla_lstm_gates_dropout_concat(
                    x, prev_state._h[layer_i], self.Wx[layer_i],
                    self.Wh[layer_i], self.b[layer_i],
                    self.dropout_mask_x[layer_i], self.dropout_mask_h[layer_i],
                    self.weightnoise_std if self.train else 0.0)
            else:
                gates = dy.vanilla_lstm_gates_concat(
                    x, prev_state._h[layer_i], self.Wx[layer_i],
                    self.Wh[layer_i], self.b[layer_i],
                    self.weightnoise_std if self.train else 0.0)
            new_c.append(dy.vanilla_lstm_c(prev_state._c[layer_i], gates))
            new_h.append(dy.vanilla_lstm_h(new_c[-1], gates))
            x = [new_h[-1]]

        return new_c, new_h
Esempio n. 3
0
    def transduce(
        self, expr_seq: 'expression_seqs.ExpressionSequence'
    ) -> 'expression_seqs.ExpressionSequence':
        """
    transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c)

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """
        if isinstance(expr_seq, expression_seqs.ExpressionSequence):
            expr_seq = [expr_seq]
        batch_size = expr_seq[0].batch_size()
        seq_len = expr_seq[0].sent_len()

        if self.dropout_rate > 0.0 and self.train:
            self.set_dropout_masks(batch_size=batch_size)

        cur_input = expr_seq
        self._final_states = []
        for layer_i in range(self.num_layers):
            h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)]
            for pos_i in range(seq_len):
                x_t = [cur_input[j][pos_i] for j in range(len(cur_input))]
                if isinstance(x_t, dy.Expression):
                    x_t = [x_t]
                elif type(x_t) != list:
                    x_t = list(x_t)
                if (layer_i == 0 and sum([x_t_i.dim()[0][0] for x_t_i in x_t]) != self.total_input_dim) \
                        or (layer_i>0 and sum([x_t_i.dim()[0][0] for x_t_i in x_t]) != self.hidden_dim):
                    found_dim = sum([x_t_i.dim()[0][0] for x_t_i in x_t])
                    raise ValueError(
                        f"VanillaLSTMGates: x_t has inconsistent dimension {found_dim}, "
                        f"expecting {self.total_input_dim if layer_i==0 else self.hidden_dim}"
                    )
                if self.dropout_rate > 0.0 and self.train:
                    # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
                    gates_t = dy.vanilla_lstm_gates_dropout_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i], self.dropout_mask_x[layer_i],
                        self.dropout_mask_h[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                else:
                    gates_t = dy.vanilla_lstm_gates_concat(
                        x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i],
                        self.b[layer_i],
                        self.weightnoise_std if self.train else 0.0)
                c_t = dy.vanilla_lstm_c(c[-1], gates_t)
                h_t = dy.vanilla_lstm_h(c_t, gates_t)
                if expr_seq[0].mask is None or np.isclose(
                        np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]),
                        0.0):
                    c.append(c_t)
                    h.append(h_t)
                else:
                    c.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        c_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 c[-1], pos_i, False))
                    h.append(expr_seq[0].mask.cmult_by_timestep_expr(
                        h_t, pos_i, True) +
                             expr_seq[0].mask.cmult_by_timestep_expr(
                                 h[-1], pos_i, False))
            self._final_states.append(
                transducers.FinalTransducerState(h[-1], c[-1]))
            cur_input = [h[1:]]

        return expression_seqs.ExpressionSequence(expr_list=h[1:],
                                                  mask=expr_seq[0].mask)