def __call__(self, expr_seq): """ transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c) Args: expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated) Returns: expression sequence """ if isinstance(expr_seq, ExpressionSequence): expr_seq = [expr_seq] batch_size = expr_seq[0][0].dim()[1] seq_len = len(expr_seq[0]) if self.dropout_rate > 0.0 and self.train: self.set_dropout_masks(batch_size=batch_size) cur_input = expr_seq self._final_states = [] for layer_i in range(self.num_layers): h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)] c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)] for pos_i in range(seq_len): x_t = [cur_input[j][pos_i] for j in range(len(cur_input))] if isinstance(x_t, dy.Expression): x_t = [x_t] elif type(x_t) != list: x_t = list(x_t) if self.dropout_rate > 0.0 and self.train: # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) gates_t = dy.vanilla_lstm_gates_dropout_concat( x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.dropout_mask_x[layer_i], self.dropout_mask_h[layer_i], self.weightnoise_std if self.train else 0.0) else: gates_t = dy.vanilla_lstm_gates_concat( x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.weightnoise_std if self.train else 0.0) c_t = dy.vanilla_lstm_c(c[-1], gates_t) h_t = dy.vanilla_lstm_h(c_t, gates_t) if expr_seq[0].mask is None or np.isclose( np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append(expr_seq[0].mask.cmult_by_timestep_expr( c_t, pos_i, True) + expr_seq[0].mask.cmult_by_timestep_expr( c[-1], pos_i, False)) h.append(expr_seq[0].mask.cmult_by_timestep_expr( h_t, pos_i, True) + expr_seq[0].mask.cmult_by_timestep_expr( h[-1], pos_i, False)) self._final_states.append(FinalTransducerState(h[-1], c[-1])) cur_input = [h[1:]] return ExpressionSequence(expr_list=h[1:], mask=expr_seq[0].mask)
def add_input_to_prev(self, prev_state: UniLSTMState, x: Union[dy.Expression, Sequence[dy.Expression]]) \ -> Tuple[Sequence[dy.Expression]]: if isinstance(x, dy.Expression): x = [x] elif type(x) != list: x = list(x) if self.dropout_rate > 0.0 and self.train and self.dropout_mask_x is None: self.set_dropout_masks() new_c, new_h = [], [] for layer_i in range(self.num_layers): if self.dropout_rate > 0.0 and self.train: # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) gates = dy.vanilla_lstm_gates_dropout_concat( x, prev_state._h[layer_i], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.dropout_mask_x[layer_i], self.dropout_mask_h[layer_i], self.weightnoise_std if self.train else 0.0) else: gates = dy.vanilla_lstm_gates_concat( x, prev_state._h[layer_i], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.weightnoise_std if self.train else 0.0) new_c.append(dy.vanilla_lstm_c(prev_state._c[layer_i], gates)) new_h.append(dy.vanilla_lstm_h(new_c[-1], gates)) x = [new_h[-1]] return new_c, new_h
def transduce( self, expr_seq: 'expression_seqs.ExpressionSequence' ) -> 'expression_seqs.ExpressionSequence': """ transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c) Args: expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated) Returns: expression sequence """ if isinstance(expr_seq, expression_seqs.ExpressionSequence): expr_seq = [expr_seq] batch_size = expr_seq[0].batch_size() seq_len = expr_seq[0].sent_len() if self.dropout_rate > 0.0 and self.train: self.set_dropout_masks(batch_size=batch_size) cur_input = expr_seq self._final_states = [] for layer_i in range(self.num_layers): h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)] c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)] for pos_i in range(seq_len): x_t = [cur_input[j][pos_i] for j in range(len(cur_input))] if isinstance(x_t, dy.Expression): x_t = [x_t] elif type(x_t) != list: x_t = list(x_t) if (layer_i == 0 and sum([x_t_i.dim()[0][0] for x_t_i in x_t]) != self.total_input_dim) \ or (layer_i>0 and sum([x_t_i.dim()[0][0] for x_t_i in x_t]) != self.hidden_dim): found_dim = sum([x_t_i.dim()[0][0] for x_t_i in x_t]) raise ValueError( f"VanillaLSTMGates: x_t has inconsistent dimension {found_dim}, " f"expecting {self.total_input_dim if layer_i==0 else self.hidden_dim}" ) if self.dropout_rate > 0.0 and self.train: # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) gates_t = dy.vanilla_lstm_gates_dropout_concat( x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.dropout_mask_x[layer_i], self.dropout_mask_h[layer_i], self.weightnoise_std if self.train else 0.0) else: gates_t = dy.vanilla_lstm_gates_concat( x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.weightnoise_std if self.train else 0.0) c_t = dy.vanilla_lstm_c(c[-1], gates_t) h_t = dy.vanilla_lstm_h(c_t, gates_t) if expr_seq[0].mask is None or np.isclose( np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append(expr_seq[0].mask.cmult_by_timestep_expr( c_t, pos_i, True) + expr_seq[0].mask.cmult_by_timestep_expr( c[-1], pos_i, False)) h.append(expr_seq[0].mask.cmult_by_timestep_expr( h_t, pos_i, True) + expr_seq[0].mask.cmult_by_timestep_expr( h[-1], pos_i, False)) self._final_states.append( transducers.FinalTransducerState(h[-1], c[-1])) cur_input = [h[1:]] return expression_seqs.ExpressionSequence(expr_list=h[1:], mask=expr_seq[0].mask)