def embed_sent( self, sent_len: numbers.Integral) -> expression_seqs.ExpressionSequence: embeddings = dy.strided_select(dy.parameter(self.embeddings), [1, 1], [0, 0], [self.emb_dim, sent_len]) return expression_seqs.ExpressionSequence(expr_tensor=embeddings, mask=None)
def __call__(self, expr): return dy.strided_select(expr, [ self.margin[0], expr.dim()[0][0] - self.margin[0], self.stride[0], self.margin[1], expr.dim()[0][1] - self.margin[1], self.stride[1], 0, expr.dim()[0][2], 1, 0, expr.dim()[1], 1 ])
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: sent_len = len(src) embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len]) if self.op == 'sum': output = embeddings + src.as_tensor() elif self.op == 'concat': output = dy.concatenate([embeddings, src.as_tensor()]) else: raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")') output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def transduce( self, expr_seq: expression_seqs.ExpressionSequence ) -> expression_seqs.ExpressionSequence: """ transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c) Args: expr_seq: expression sequence (will be accessed via tensor_expr) Return: expression sequence """ if isinstance(expr_seq, list): mask_out = expr_seq[0].mask seq_len = len(expr_seq[0]) batch_size = expr_seq[0].dim()[1] tensors = [e.as_tensor() for e in expr_seq] input_tensor = dy.reshape(dy.concatenate(tensors), (seq_len, 1, self.input_dim), batch_size=batch_size) else: mask_out = expr_seq.mask seq_len = len(expr_seq) batch_size = expr_seq.dim()[1] input_tensor = dy.reshape(dy.transpose(expr_seq.as_tensor()), (seq_len, 1, self.input_dim), batch_size=batch_size) if self.dropout > 0.0 and self.train: input_tensor = dy.dropout(input_tensor, self.dropout) proj_inp = dy.conv2d_bias(input_tensor, dy.parameter(self.p_f), dy.parameter(self.p_b), stride=(self.stride, 1), is_valid=False) reduced_seq_len = proj_inp.dim()[0][0] proj_inp = dy.transpose( dy.reshape(proj_inp, (reduced_seq_len, self.hidden_dim * 3), batch_size=batch_size)) # proj_inp dims: (hidden, 1, seq_len), batch_size if self.stride > 1 and mask_out is not None: mask_out = mask_out.lin_subsampled(trg_len=reduced_seq_len) h = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)] c = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)] for t in range(reduced_seq_len): f_t = dy.logistic( dy.strided_select(proj_inp, [], [0, t], [self.hidden_dim, t + 1])) o_t = dy.logistic( dy.strided_select(proj_inp, [], [self.hidden_dim, t], [self.hidden_dim * 2, t + 1])) z_t = dy.tanh( dy.strided_select(proj_inp, [], [self.hidden_dim * 2, t], [self.hidden_dim * 3, t + 1])) if self.dropout > 0.0 and self.train: retention_rate = 1.0 - self.dropout dropout_mask = dy.random_bernoulli((self.hidden_dim, 1), retention_rate, batch_size=batch_size) f_t = 1.0 - dy.cmult( dropout_mask, 1.0 - f_t ) # TODO: would be easy to make a zoneout dynet operation to save memory i_t = 1.0 - f_t if t == 0: c_t = dy.cmult(i_t, z_t) else: c_t = dy.cmult(f_t, c[-1]) + dy.cmult(i_t, z_t) h_t = dy.cmult( o_t, c_t) # note: LSTM would use dy.tanh(c_t) instead of c_t if mask_out is None or np.isclose( np.sum(mask_out.np_arr[:, t:t + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append( mask_out.cmult_by_timestep_expr(c_t, t, True) + mask_out.cmult_by_timestep_expr(c[-1], t, False)) h.append( mask_out.cmult_by_timestep_expr(h_t, t, True) + mask_out.cmult_by_timestep_expr(h[-1], t, False)) self._final_states = [transducers.FinalTransducerState(dy.reshape(h[-1], (self.hidden_dim,), batch_size=batch_size), \ dy.reshape(c[-1], (self.hidden_dim,), batch_size=batch_size))] return expression_seqs.ExpressionSequence(expr_list=h[1:], mask=mask_out)
def embed_sent(self, sent_len): embeddings = dy.strided_select(dy.parameter(self.embeddings), [1,1], [0,0], [self.emb_dim, sent_len]) return ExpressionSequence(expr_tensor=embeddings, mask=None)