def transduce(self, es): """ returns the list of output Expressions obtained by adding the given inputs to the current state, one by one. Args: es: a list of Expression see also add_inputs(xs), including for explanation of differences between add_inputs and this function. """ es = self.builder_layers[0](es) self._final_states = [self.builder_layers[0].get_final_states()[0]] if len(self.builder_layers) == 1: return es for l in self.builder_layers[1:]: es = ExpressionSequence(expr_list=self._sum_lists(l(es), es)) self._final_states.append(FinalTransducerState(es[-1], l.get_final_states()[0].cell_expr())) last_output = self.builder_layers[-1](es) if self.add_to_output: self._final_states.append(FinalTransducerState(last_output[-1], self.builder_layers[-1].get_final_states()[0].cell_expr())) return ExpressionSequence(expr_list=self._sum_lists(last_output, es)) else: self._final_states.append(self.builder_layers[-1].get_final_states()[0]) return last_output
def transduce(self, embed_sent): src = embed_sent.as_tensor() sent_len = src.dim()[0][1] src_width = 1 batch_size = src.dim()[1] pad_size = (self.window_receptor - 1) / 2 #TODO adapt it also for even window size src = dy.concatenate([ dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src, dy.zeroes((self.input_dim, pad_size), batch_size=batch_size) ], d=1) padded_sent_len = sent_len + 2 * pad_size conv1 = dy.parameter(self.pConv1) bias1 = dy.parameter(self.pBias1) src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1), batch_size=batch_size) cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1]) hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1), batch_size=batch_size) if self.non_linearity is 'linear': hidden_layer = hidden_layer elif self.non_linearity is 'tanh': hidden_layer = dy.tanh(hidden_layer) elif self.non_linearity is 'relu': hidden_layer = dy.rectify(hidden_layer) elif self.non_linearity is 'sigmoid': hidden_layer = dy.logistic(hidden_layer) for conv_hid, bias_hid in self.builder_layers: hidden_layer = dy.conv2d_bias(hidden_layer, dy.parameter(conv_hid), dy.parameter(bias_hid), stride=[1, 1]) hidden_layer = dy.reshape(hidden_layer, (self.internal_dim, sent_len, 1), batch_size=batch_size) if self.non_linearity is 'linear': hidden_layer = hidden_layer elif self.non_linearity is 'tanh': hidden_layer = dy.tanh(hidden_layer) elif self.non_linearity is 'relu': hidden_layer = dy.rectify(hidden_layer) elif self.non_linearity is 'sigmoid': hidden_layer = dy.logistic(hidden_layer) last_conv = dy.parameter(self.last_conv) last_bias = dy.parameter(self.last_bias) output = dy.conv2d_bias(hidden_layer, last_conv, last_bias, stride=[1, 1]) output = dy.reshape(output, (sent_len, self.output_dim), batch_size=batch_size) output_seq = ExpressionSequence(expr_tensor=output) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def __call__(self, es): mask = es.mask # first layer forward_es = self.forward_layers[0](es) rev_backward_es = self.backward_layers[0]( ReversedExpressionSequence(es)) for layer_i in range(1, len(self.forward_layers)): new_forward_es = self.forward_layers[layer_i]( [forward_es, ReversedExpressionSequence(rev_backward_es)]) rev_backward_es = ExpressionSequence(self.backward_layers[layer_i]( [ReversedExpressionSequence(forward_es), rev_backward_es]).as_list(), mask=mask) forward_es = new_forward_es self._final_states = [FinalTransducerState(dy.concatenate([self.forward_layers[layer_i].get_final_states()[0].main_expr(), self.backward_layers[layer_i].get_final_states()[0].main_expr()]), dy.concatenate([self.forward_layers[layer_i].get_final_states()[0].cell_expr(), self.backward_layers[layer_i].get_final_states()[0].cell_expr()])) \ for layer_i in range(len(self.forward_layers))] return ExpressionSequence(expr_list=[ dy.concatenate([forward_es[i], rev_backward_es[-i - 1]]) for i in range(len(forward_es)) ], mask=mask)
def __call__(self, expr_seq): """ transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c) Args: expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated) Returns: expression sequence """ if isinstance(expr_seq, ExpressionSequence): expr_seq = [expr_seq] batch_size = expr_seq[0][0].dim()[1] seq_len = len(expr_seq[0]) if self.dropout_rate > 0.0 and self.train: self.set_dropout_masks(batch_size=batch_size) cur_input = expr_seq self._final_states = [] for layer_i in range(self.num_layers): h = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)] c = [dy.zeroes(dim=(self.hidden_dim, ), batch_size=batch_size)] for pos_i in range(seq_len): x_t = [cur_input[j][pos_i] for j in range(len(cur_input))] if isinstance(x_t, dy.Expression): x_t = [x_t] elif type(x_t) != list: x_t = list(x_t) if self.dropout_rate > 0.0 and self.train: # apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) gates_t = dy.vanilla_lstm_gates_dropout_concat( x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.dropout_mask_x[layer_i], self.dropout_mask_h[layer_i], self.weightnoise_std if self.train else 0.0) else: gates_t = dy.vanilla_lstm_gates_concat( x_t, h[-1], self.Wx[layer_i], self.Wh[layer_i], self.b[layer_i], self.weightnoise_std if self.train else 0.0) c_t = dy.vanilla_lstm_c(c[-1], gates_t) h_t = dy.vanilla_lstm_h(c_t, gates_t) if expr_seq[0].mask is None or np.isclose( np.sum(expr_seq[0].mask.np_arr[:, pos_i:pos_i + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append(expr_seq[0].mask.cmult_by_timestep_expr( c_t, pos_i, True) + expr_seq[0].mask.cmult_by_timestep_expr( c[-1], pos_i, False)) h.append(expr_seq[0].mask.cmult_by_timestep_expr( h_t, pos_i, True) + expr_seq[0].mask.cmult_by_timestep_expr( h[-1], pos_i, False)) self._final_states.append(FinalTransducerState(h[-1], c[-1])) cur_input = [h[1:]] return ExpressionSequence(expr_list=h[1:], mask=expr_seq[0].mask)
def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence: """ transduce the sequence Args: expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated) Returns: expression sequence """ Wq, Wk, Wv, Wo = [ dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo) ] bq, bk, bv, bo = [ dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo) ] # Start with a [(length, model_size) x batch] tensor x = expr_seq.as_transposed_tensor() x_len = x.dim()[0][0] x_batch = x.dim()[1] # Get the query key and value vectors # TODO: do we need bias broadcasting in DyNet? # q = dy.affine_transform([bq, x, Wq]) # k = dy.affine_transform([bk, x, Wk]) # v = dy.affine_transform([bv, x, Wv]) q = bq + x * Wq k = bk + x * Wk v = bv + x * Wv # Split to batches [(length, head_dim) x batch * num_heads] tensor q, k, v = [ dy.reshape(x, (x_len, self.head_dim), batch_size=x_batch * self.num_heads) for x in (q, k, v) ] # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys attn_score = q * dy.transpose(k) / sqrt(self.head_dim) if expr_seq.mask is not None: mask = dy.inputTensor(np.repeat( expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(), batched=True) * -1e10 attn_score = attn_score + mask attn_prob = dy.softmax(attn_score, d=1) # Reduce using attention and resize to match [(length, model_size) x batch] o = dy.reshape(attn_prob * v, (x_len, self.input_dim), batch_size=x_batch) # Final transformation # o = dy.affine_transform([bo, attn_prob * v, Wo]) o = bo + o * Wo expr_seq = ExpressionSequence(expr_transposed_tensor=o, mask=expr_seq.mask) self._final_states = [FinalTransducerState(expr_seq[-1], None)] return expr_seq
def transduce(self, es): forward_e = self.forward_layer(es) backward_e = self.backward_layer(ReversedExpressionSequence(es)) self._final_states = [FinalTransducerState(dy.concatenate([self.forward_layer.get_final_states()[0].main_expr(), self.backward_layer.get_final_states()[0].main_expr()]), dy.concatenate([self.forward_layer.get_final_states()[0].cell_expr(), self.backward_layer.get_final_states()[0].cell_expr()]))] output = self.residual_network.transduce(ExpressionSequence(expr_list=[dy.concatenate([f,b]) for f,b in zip(forward_e, ReversedExpressionSequence(backward_e))])) self._final_states += self.residual_network.get_final_states() return output
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: sent_len = len(src) embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len]) if self.op == 'sum': output = embeddings + src.as_tensor() elif self.op == 'concat': output = dy.concatenate([embeddings, src.as_tensor()]) else: raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")') output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def __call__(self, es): """ returns the list of output Expressions obtained by adding the given inputs to the current state, one by one, to both the forward and backward RNNs, and concatenating. :param es: an ExpressionSequence """ es_list = [es] for layer_i, (fb, bb) in enumerate(self.builder_layers): reduce_factor = self._reduce_factor_for_layer(layer_i) if self.downsampling_method=="concat" and len(es_list[0]) % reduce_factor != 0: raise ValueError("For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor. Configure batcher accordingly.") fs = fb(es_list) bs = bb([ReversedExpressionSequence(es_item) for es_item in es_list]) if layer_i < len(self.builder_layers) - 1: if self.downsampling_method=="skip": es_list = [ExpressionSequence(expr_list=fs[::reduce_factor]), ExpressionSequence(expr_list=bs[::reduce_factor][::-1])] elif self.downsampling_method=="concat": es_len = len(es_list[0]) es_list_fwd = [] es_list_bwd = [] for i in range(0, es_len, reduce_factor): for j in range(reduce_factor): if i==0: es_list_fwd.append([]) es_list_bwd.append([]) es_list_fwd[j].append(fs[i+j]) es_list_bwd[j].append(bs[len(es_list[0])-reduce_factor+j-i]) es_list = [ExpressionSequence(expr_list=es_list_fwd[j]) for j in range(reduce_factor)] + [ExpressionSequence(expr_list=es_list_bwd[j]) for j in range(reduce_factor)] else: raise RuntimeError("unknown downsampling_method %s" % self.downsampling_method) else: # concat final outputs ret_es = ExpressionSequence(expr_list=[dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs))]) self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(), bb.get_final_states()[0].main_expr()]), dy.concatenate([fb.get_final_states()[0].cell_expr(), bb.get_final_states()[0].cell_expr()])) \ for (fb, bb) in self.builder_layers] return ret_es
def transduce(self, embed_sent): src = embed_sent.as_tensor() W = dy.parameter(self.pW) b = dy.parameter(self.pb) l1 = dy.affine_transform([b, W, src]) output = l1 if self.nonlinearity is 'linear': output = l1 elif self.nonlinearity is 'sigmoid': output = dy.logistic(l1) elif self.nonlinearity is 'tanh': output = 2 * dy.logistic(l1) - 1 elif self.nonlinearity is 'relu': output = dy.rectify(l1) output_seq = ExpressionSequence(expr_tensor=output) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def __call__(self, es): """ returns the list of output Expressions obtained by adding the given inputs to the current state, one by one, to both the forward and backward RNNs, and concatenating. :param es: an ExpressionSequence """ es_list = [es] zero_pad = None batch_size = es_list[0][0].dim()[1] for layer_i, (fb, bb) in enumerate(self.builder_layers): reduce_factor = self._reduce_factor_for_layer(layer_i) while self.downsampling_method == "concat" and len( es_list[0]) % reduce_factor != 0: for es_i in range(len(es_list)): expr_list = es_list[es_i].as_list() if zero_pad is None or zero_pad.dim( )[0][0] != expr_list[0].dim()[0][0]: zero_pad = dy.zeros(dim=expr_list[0].dim()[0][0], batch_size=batch_size) expr_list.append(zero_pad) es_list[es_i] = ExpressionSequence(expr_list=expr_list) fs = fb(es_list) bs = bb( [ReversedExpressionSequence(es_item) for es_item in es_list]) if layer_i < len(self.builder_layers) - 1: if self.downsampling_method == "skip": es_list = [ ExpressionSequence(expr_list=fs[::reduce_factor]), ExpressionSequence(expr_list=bs[::reduce_factor][::-1]) ] elif self.downsampling_method == "concat": es_len = len(es_list[0]) es_list_fwd = [] es_list_bwd = [] for i in range(0, es_len, reduce_factor): for j in range(reduce_factor): if i == 0: es_list_fwd.append([]) es_list_bwd.append([]) es_list_fwd[j].append(fs[i + j]) es_list_bwd[j].append(bs[len(es_list[0]) - reduce_factor + j - i]) es_list = [ ExpressionSequence(expr_list=es_list_fwd[j]) for j in range(reduce_factor) ] + [ ExpressionSequence(expr_list=es_list_bwd[j]) for j in range(reduce_factor) ] else: raise RuntimeError("unknown downsampling_method %s" % self.downsampling_method) else: # concat final outputs ret_es = ExpressionSequence(expr_list=[ dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs)) ]) self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(), bb.get_final_states()[0].main_expr()]), dy.concatenate([fb.get_final_states()[0].cell_expr(), bb.get_final_states()[0].cell_expr()])) \ for (fb, bb) in self.builder_layers] return ret_es
def transduce(self, es: ExpressionSequence) -> ExpressionSequence: """ returns the list of output Expressions obtained by adding the given inputs to the current state, one by one, to both the forward and backward RNNs, and concatenating. Args: es: an ExpressionSequence """ es_list = [es] for layer_i, (fb, bb) in enumerate(self.builder_layers): reduce_factor = self._reduce_factor_for_layer(layer_i) if es_list[0].mask is None: mask_out = None else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor) if self.downsampling_method == "concat" and len( es_list[0]) % reduce_factor != 0: raise ValueError( f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, " f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. " f"Set Batcher's pad_src_to_multiple argument accordingly.") fs = fb.transduce(es_list) bs = bb.transduce( [ReversedExpressionSequence(es_item) for es_item in es_list]) if layer_i < len(self.builder_layers) - 1: if self.downsampling_method == "skip": es_list = [ ExpressionSequence(expr_list=fs[::reduce_factor], mask=mask_out), ExpressionSequence(expr_list=bs[::reduce_factor][::-1], mask=mask_out) ] elif self.downsampling_method == "concat": es_len = len(es_list[0]) es_list_fwd = [] es_list_bwd = [] for i in range(0, es_len, reduce_factor): for j in range(reduce_factor): if i == 0: es_list_fwd.append([]) es_list_bwd.append([]) es_list_fwd[j].append(fs[i + j]) es_list_bwd[j].append(bs[len(es_list[0]) - reduce_factor + j - i]) es_list = [ExpressionSequence(expr_list=es_list_fwd[j], mask=mask_out) for j in range(reduce_factor)] + \ [ExpressionSequence(expr_list=es_list_bwd[j], mask=mask_out) for j in range(reduce_factor)] else: raise RuntimeError( f"unknown downsampling_method {self.downsampling_method}" ) else: # concat final outputs ret_es = ExpressionSequence(expr_list=[ dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs)) ], mask=mask_out) self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(), bb.get_final_states()[0].main_expr()]), dy.concatenate([fb.get_final_states()[0].cell_expr(), bb.get_final_states()[0].cell_expr()])) \ for (fb, bb) in self.builder_layers] return ret_es
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: output = self.transform(src.as_tensor()) output_seq = ExpressionSequence(expr_tensor=output) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def __call__(self, es, transitions): mask = es.mask #import pdb;pdb.set_trace() transitions = [t + [0, 1] for t in transitions] transitions = np.array(transitions) maxlen = max(len(r) for r in transitions) Wl = dy.parameter(self.p_Wl) Wr = dy.parameter(self.p_Wr) b = dy.parameter(self.p_b) batch_size = len(transitions) ha = [] c = [] self.hfinals = [] hfinal_state = None cfinal_state = None self.cfinals = [] for i in range(batch_size): hstack = [] cstack = [] htmp = [] count = 0 for j in range(len(transitions[i])): if transitions[i][j] == 0: #print("Shift") #shift onto stack e1 = dy.reshape(es[count], (batch_size, self.hidden_dim))[i] count += 1 hstack.append(e1) cstack.append(e1) elif transitions[i][j] == 1: #reduce #print("Reduce") h1 = hstack.pop() h2 = hstack.pop() c1 = cstack.pop() c2 = cstack.pop() tmp = dy.affine_transform([b, Wl, h1, Wr, h2]) i_gate = dy.pick_range(tmp, 0, self.hidden_dim) fl_gate = dy.pick_range(tmp, self.hidden_dim, self.hidden_dim * 2) fr_gate = dy.pick_range(tmp, self.hidden_dim * 2, self.hidden_dim * 3) o_gate = dy.pick_range(tmp, self.hidden_dim * 3, self.hidden_dim * 4) cell_inp = dy.pick_range(tmp, self.hidden_dim * 4, self.hidden_dim * 5) i_gate = dy.tanh(i_gate) cell_inp = dy.logistic(cell_inp) fl_gate = dy.logistic(fl_gate) fr_gate = dy.logistic(fr_gate) o_gate = dy.logistic(o_gate) c_t = dy.cmult(fl_gate, c1) + dy.cmult( fr_gate, c2) + dy.cmult(i_gate, cell_inp) h_t = dy.cmult(o_gate, dy.tanh(c_t)) cstack.append(c_t) hstack.append(h_t) htmp.append(h_t) hfinal_state = h_t cfinal_state = c_t else: htmp.append(dy.zeros(self.hidden_dim)) self.hfinals.append(h_t) self.cfinals.append(c_t) ha.append(htmp) self._final_states = [ FinalTransducerState(dy.concatenate_to_batch(self.hfinals), dy.concatenate_to_batch(self.cfinals)) ] ha = list(zip_longest(*ha)) hh = [] for x in ha: hh.append(list(x)) k = [ dy.reshape(dy.concatenate(xx), (xx[0].dim()[0][0], len(xx))) for xx in hh ] return ExpressionSequence(expr_list=k)
def __call__(self, embed_sent): batch_size = embed_sent[0].dim()[1] # Softmax + segment decision encodings = self.embed_encoder(embed_sent) if self.learn_segmentation: segment_decisions, segment_logsoftmaxes = self.sample_segmentation( encodings, batch_size) else: segment_decisions, segment_logsoftmaxes = self.sample_segmentation( encodings, batch_size, self._src) # Some checks assert len(encodings) == len(segment_decisions), \ "Encoding={}, segment={}".format(len(encodings), len(segment_decisions)) # The last segment decision should be equal to 1 if len(segment_decisions) > 0: segment_decisions[-1] = numpy.ones(segment_decisions[-1].shape, dtype=int) # Buffer for output buffers = [[] for _ in range(batch_size)] outputs = [[] for _ in range(batch_size)] last_segment = [-1 for _ in range(batch_size)] length_prior = [0 for _ in range(batch_size)] length_div = [0 for _ in range(batch_size)] self.segment_transducer.set_input_size(batch_size, len(encodings)) # Loop through all the frames (word / item) in input. for j, (encoding, segment_decision) in enumerate( six.moves.zip(encodings, segment_decisions)): # For each decision in the batch for i, decision in enumerate(segment_decision): # If segment for this particular input decision = int(decision) if decision == SegmentingAction.DELETE.value: continue # Get the particular encoding for that batch item encoding_i = dy.pick_batch_elem(encoding, i) # Append the encoding for this item to the buffer buffers[i].append(encoding_i) if decision == SegmentingAction.SEGMENT.value: expr_seq = expression_sequence.ExpressionSequence( expr_list=buffers[i]) transduce_output = self.segment_transducer.transduce( expr_seq) outputs[i].append(transduce_output) buffers[i] = [] # Calculate length prior length_prior[i] += numpy.log( poisson.pmf(j - last_segment[i], self.length_prior) + 1e-10) length_div[i] += 1 last_segment[i] = j self.segment_transducer.next_item() length_prior = list( six.moves.map(lambda i: length_prior[i] / length_div[i], range(len(length_prior)))) # Padding max_col = max(len(xs) for xs in outputs) P0 = dy.vecInput(self.segment_transducer.encoder.hidden_dim) def pad(xs): deficit = max_col - len(xs) if deficit > 0: xs.extend([P0 for _ in range(deficit)]) return xs outputs = dy.concatenate_to_batch( list( six.moves.map(lambda xs: dy.concatenate_cols(pad(xs)), outputs))) self.segment_decisions = segment_decisions self.segment_logsoftmaxes = segment_logsoftmaxes # Packing output together if self.train and self.learn_segmentation: self.segment_length_prior = dy.inputTensor(length_prior, batched=True) if self.use_baseline: self.bs = list( six.moves.map(lambda x: self.baseline(dy.nobackprop(x)), encodings)) if not self.train: self.set_report_input(segment_decisions) self._final_encoder_state = [FinalTransducerState(encodings[-1])] # Return the encoded batch by the size of [(encode,segment)] * batch_size return expression_sequence.ExpressionSequence(expr_tensor=outputs)