def transduce(self, embed_sent): src = embed_sent.as_tensor() sent_len = src.dim()[0][1] src_width = 1 batch_size = src.dim()[1] pad_size = (self.window_receptor - 1) / 2 #TODO adapt it also for even window size src = dy.concatenate([ dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src, dy.zeroes((self.input_dim, pad_size), batch_size=batch_size) ], d=1) padded_sent_len = sent_len + 2 * pad_size conv1 = dy.parameter(self.pConv1) bias1 = dy.parameter(self.pBias1) src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1), batch_size=batch_size) cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1]) hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1), batch_size=batch_size) if self.non_linearity is 'linear': hidden_layer = hidden_layer elif self.non_linearity is 'tanh': hidden_layer = dy.tanh(hidden_layer) elif self.non_linearity is 'relu': hidden_layer = dy.rectify(hidden_layer) elif self.non_linearity is 'sigmoid': hidden_layer = dy.logistic(hidden_layer) for conv_hid, bias_hid in self.builder_layers: hidden_layer = dy.conv2d_bias(hidden_layer, dy.parameter(conv_hid), dy.parameter(bias_hid), stride=[1, 1]) hidden_layer = dy.reshape(hidden_layer, (self.internal_dim, sent_len, 1), batch_size=batch_size) if self.non_linearity is 'linear': hidden_layer = hidden_layer elif self.non_linearity is 'tanh': hidden_layer = dy.tanh(hidden_layer) elif self.non_linearity is 'relu': hidden_layer = dy.rectify(hidden_layer) elif self.non_linearity is 'sigmoid': hidden_layer = dy.logistic(hidden_layer) last_conv = dy.parameter(self.last_conv) last_bias = dy.parameter(self.last_bias) output = dy.conv2d_bias(hidden_layer, last_conv, last_bias, stride=[1, 1]) output = dy.reshape(output, (sent_len, self.output_dim), batch_size=batch_size) output_seq = ExpressionSequence(expr_tensor=output) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def build_graph(self, x): conv_W_1 = dy.parameter(self.params['conv_W_1']) conv_b_1 = dy.parameter(self.params['conv_b_1']) conv_W_2 = dy.parameter(self.params['conv_W_2']) conv_b_2 = dy.parameter(self.params['conv_b_2']) conv_W_3 = dy.parameter(self.params['conv_W_3']) conv_b_3 = dy.parameter(self.params['conv_b_3']) W = dy.parameter(self.params['W']) b = dy.parameter(self.params['b']) (n, d), _ = x.dim() x = dy.reshape(x, (1, n, d)) # 一维卷积网络 conv_1 = dy.tanh( dy.conv2d_bias(x, conv_W_1, conv_b_1, (1, 1), is_valid=False)) conv_2 = dy.tanh( dy.conv2d_bias(x, conv_W_2, conv_b_2, (1, 1), is_valid=False)) conv_3 = dy.tanh( dy.conv2d_bias(x, conv_W_3, conv_b_3, (1, 1), is_valid=False)) pool_1 = dy.max_dim(dy.reshape(conv_1, (n, self.options['channel_1']))) pool_2 = dy.max_dim(dy.reshape(conv_2, (n, self.options['channel_2']))) pool_3 = dy.max_dim(dy.reshape(conv_3, (n, self.options['channel_3']))) # 全连接分类 pool = dy.concatenate([pool_1, pool_2, pool_3], 0) logit = dy.dot_product(pool, W) + b return logit
def do_one_batch(X_batch, Z_batch): # Flatten the batch into 1-D vector for workaround batch_size = X_batch.shape[0] if DO_BATCH: X_batch_f = X_batch.flatten('F') Z_batch_f = Z_batch.flatten('F') x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes), batch_size=batch_size) z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg), batch_size=batch_size) scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])]) vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])]) else: x = dy.matInput(X_batch.shape[0], X_batch.shape[1]) x.set(X_batch.flatten('F')) z = dy.vecInput(Z_batch.shape[0]) z.set(Z_batch.flatten('F')) x = dy.reshape(dy.transpose(x, [1, 0]), (1, X_batch.shape[1], X_batch.shape[0])) print(x.npvalue().shape) a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False) h1 = dy.rectify(a_h1) h1_pool = dy.kmax_pooling(h1, D[1], d=1) a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False) h2 = dy.rectify(a_h2) h2_pool = dy.kmax_pooling(h2, D[2], d=1) a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False) h3 = dy.rectify(a_h3) h3_pool = dy.kmax_pooling(h3, D[3], d=1) h4 = dy.kmax_pooling(h3_pool, 1, d=1) h4_re = dy.reshape(h4, (J[3], )) #print(h4_re.npvalue().shape) g = dy.scalarInput(1.) zem_sp = dy.weight_norm(h4_re, g) #print(zem_sp.npvalue().shape) zem_vgg = w_embed * z + b_embed #print(zem_vgg.npvalue().shape) sa = dy.transpose(zem_sp) * zem_vgg s = dy.rectify(sa) if PRINT_EMBED: print('Vgg embedding vector:', zem_vgg.npvalue().shape) print(zem_vgg.value()) print('Speech embedding vector:', zem_sp.npvalue().shape) print(zem_sp.value()) if PRINT_SIM: print('Raw Similarity:', sa.npvalue()) print(sa.value()) print('Similarity:', s.npvalue()) print(s.value()) return s
def apply(self, x_input): #print "\tapplying",self.kernel.expr().npvalue().shape,"convolution" output_s = dy.conv2d_bias(x_input, self.kernel_s.expr(), self.bias_s.expr(), (self.s_x, self.s_y), is_valid=self.is_valid) output_t = dy.conv2d_bias(x_input, self.kernel_t.expr(), self.bias_t.expr(), (self.s_x, self.s_y), is_valid=self.is_valid) return dy.cmult(dy.tanh(output_t), dy.logistic(output_s))
def __call__(self, x, dropout=False): if args.conv: x = dy.reshape(x, (28, 28, 1)) x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) # 7x7x64 x = dy.reshape(x, (7 * 7 * 64,)) h = dy.rectify(self.W1 * x + self.hbias) if dropout: h = dy.dropout(h, DROPOUT_RATE) logits = self.W2 * h return logits
def __call__(self, x, dropout=False): if args.conv: x = dy.reshape(x, (28, 28, 1)) x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) # 7x7x64 x = dy.reshape(x, (7 * 7 * 64, )) h = dy.rectify(self.W1 * x + self.hbias) if dropout: h = dy.dropout(h, DROPOUT_RATE) logits = self.W2 * h return logits
def calc_predict_and_activations(wids, tag, words): dy.renew_cg() if len(wids) < WIN_SIZE: wids += [0] * (WIN_SIZE - len(wids)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1) cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False) filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue() activations = filters.argmax(axis=0) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE, )) pool_out = dy.rectify(pool_out) scores = (W_sm * pool_out + b_sm).npvalue() print('%d ||| %s' % (tag, ' '.join(words))) predict = np.argmax(scores) print(display_activations(words, activations)) print('scores=%s, predict: %d' % (scores, predict)) features = pool_out.npvalue() W = W_sm.npvalue() bias = b_sm.npvalue() print(' bias=%s' % bias) contributions = W * features print(' very bad (%.4f): %s' % (scores[0], contributions[0])) print(' bad (%.4f): %s' % (scores[1], contributions[1])) print(' neutral (%.4f): %s' % (scores[2], contributions[2])) print(' good (%.4f): %s' % (scores[3], contributions[3])) print('very good (%.4f): %s' % (scores[4], contributions[4]))
def calc_scores(words): dy.renew_cg() W_cnn_express = dy.parameter(W_cnn) b_cnn_express = dy.parameter(b_cnn) W_sm_express = dy.parameter(W_sm) b_sm_express = dy.parameter(b_sm) Waux_sm_express = dy.parameter(Waux_sm) baux_sm_express = dy.parameter(baux_sm) # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step. # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding. # Padd with index 0. (so we're treating the pad words as UNK (?)) if len(words) < WIN_SIZE: words += [0] * (WIN_SIZE-len(words)) # Convolution + pooling layer cnn_in = dy.concatenate([W_emb[x] for x in words], d=1) # concat repr of all words cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False) pool_out = dy.max_dim(cnn_out, d=1) # Is this max pooling? pool_out = dy.reshape(pool_out, (FILTER_SIZE,)) pool_out = dy.rectify(pool_out) # Is this ReLU activation? # get scores for either task scores_main = W_sm_express * pool_out + b_sm_express scores_aux = Waux_sm_express * pool_out + baux_sm_express return scores_main, scores_aux
def calc_predict_and_activations(wids, tag, words): dy.renew_cg() if len(wids) < WIN_SIZE: wids += [0] * (WIN_SIZE-len(wids)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1) cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False) filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue() activations = filters.argmax(axis=0) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE,)) pool_out = dy.rectify(pool_out) scores = (W_sm * pool_out + b_sm).npvalue() print ('%d ||| %s' % (tag, ' '.join(words))) predict = np.argmax(scores) print (display_activations(words, activations)) print ('scores=%s, predict: %d' % (scores, predict)) features = pool_out.npvalue() W = W_sm.npvalue() bias = b_sm.npvalue() print (' bias=%s' % bias) contributions = W * features print (' very bad (%.4f): %s' % (scores[0], contributions[0])) print (' bad (%.4f): %s' % (scores[1], contributions[1])) print (' neutral (%.4f): %s' % (scores[2], contributions[2])) print (' good (%.4f): %s' % (scores[3], contributions[3])) print ('very good (%.4f): %s' % (scores[4], contributions[4]))
def transduce(self, es: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence: mask = es.mask sent_len = len(es) es_expr = es.as_transposed_tensor() batch_size = es_expr.dim()[1] es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) h_out = {} for direction in ["fwd", "bwd"]: # input convolutions gates_xt_bias = dy.conv2d_bias(es_chn, dy.parameter(self.params["x2all_" + direction]), dy.parameter(self.params["b_" + direction]), stride=(1, 1), is_valid=False) gates_xt_bias_list = [dy.pick_range(gates_xt_bias, i, i + 1) for i in range(sent_len)] h = [] c = [] for input_pos in range(sent_len): directional_pos = input_pos if direction == "fwd" else sent_len - input_pos - 1 gates_t = gates_xt_bias_list[directional_pos] if input_pos > 0: # recurrent convolutions gates_h_t = dy.conv2d(h[-1], dy.parameter(self.params["h2all_" + direction]), stride=(1, 1), is_valid=False) gates_t += gates_h_t # standard LSTM logic if len(c) == 0: c_tm1 = dy.zeros((self.freq_dim * self.num_filters,), batch_size=batch_size) else: c_tm1 = c[-1] gates_t_reshaped = dy.reshape(gates_t, (4 * self.freq_dim * self.num_filters,), batch_size=batch_size) c_t = dy.reshape(dy.vanilla_lstm_c(c_tm1, gates_t_reshaped), (self.freq_dim * self.num_filters,), batch_size=batch_size) h_t = dy.vanilla_lstm_h(c_t, gates_t_reshaped) h_t = dy.reshape(h_t, (1, self.freq_dim, self.num_filters,), batch_size=batch_size) if mask is None or np.isclose(np.sum(mask.np_arr[:, input_pos:input_pos + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append( mask.cmult_by_timestep_expr(c_t, input_pos, True) + mask.cmult_by_timestep_expr(c[-1], input_pos, False)) h.append( mask.cmult_by_timestep_expr(h_t, input_pos, True) + mask.cmult_by_timestep_expr(h[-1], input_pos, False)) h_out[direction] = h ret_expr = [] for state_i in range(len(h_out["fwd"])): state_fwd = h_out["fwd"][state_i] state_bwd = h_out["bwd"][-1 - state_i] output_dim = (state_fwd.dim()[0][1] * state_fwd.dim()[0][2],) fwd_reshape = dy.reshape(state_fwd, output_dim, batch_size=batch_size) bwd_reshape = dy.reshape(state_bwd, output_dim, batch_size=batch_size) ret_expr.append(dy.concatenate([fwd_reshape, bwd_reshape], d=0 if self.reshape_output else 2)) return expression_seqs.ExpressionSequence(expr_list=ret_expr, mask=mask) # TODO: implement get_final_states()
def conv(input_, _=None): """Perform the 1D conv. :param input: dy.Expression ((1, T, dsz), B) Returns: dy.Expression ((cmotsz,), B) """ c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False) return act(c)
def conv(input_, _=None): """Perform the 1D conv. :param input: dy.Expression ((1, T, dsz), B) Returns: dy.Expression ((cmotsz,), B) """ c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False) return act(c)
def transduce(self, encodings): inp = encodings dim = inp.dim() if dim[0][1] < self.ngram_size: pad = dy.zeros((self.embed_dim, self.ngram_size-dim[0][1])) inp = dy.concatenate([inp, pad], d=1) dim = inp.dim() inp = dy.reshape(inp, (1, dim[0][1], dim[0][0])) encodings = dy.rectify(dy.conv2d_bias(inp, dy.parameter(self.filter), dy.parameter(self.bias), stride=(1, 1), is_valid=True)) return dy.max_dim(dy.max_dim(encodings, d=1), d=0)
def calc_scores(wids): dy.renew_cg() if len(wids) < WIN_SIZE: wids += [0] * (WIN_SIZE-len(wids)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1) cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE,)) pool_out = dy.rectify(pool_out) return W_sm * pool_out + b_sm
def __call__(self, inputs, dropout=False): x = dy.inputTensor(inputs) conv1 = dy.parameter(self.pConv1) b1 = dy.parameter(self.pB1) x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) conv2 = dy.parameter(self.pConv2) b2 = dy.parameter(self.pB2) x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) x = dy.reshape(x, (7*7*64, 1)) w1 = dy.parameter(self.pW1) b3 = dy.parameter(self.pB3) h = dy.rectify(w1*x+b3) if dropout: h = dy.dropout(h, DROPOUT_RATE) w2 = dy.parameter(self.pW2) output = w2*h # output = dy.softmax(w2*h) return output
def __call__(self, inputs, dropout=False): x = dy.inputTensor(inputs) conv1 = dy.parameter(self.pConv1) b1 = dy.parameter(self.pB1) x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) conv2 = dy.parameter(self.pConv2) b2 = dy.parameter(self.pB2) x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False) x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2])) x = dy.reshape(x, (7 * 7 * 64, 1)) w1 = dy.parameter(self.pW1) b3 = dy.parameter(self.pB3) h = dy.rectify(w1 * x + b3) if dropout: h = dy.dropout(h, DROPOUT_RATE) w2 = dy.parameter(self.pW2) output = w2 * h # output = dy.softmax(w2*h) return output
def calc_scores(wids): dy.renew_cg() if len(wids) < WIN_SIZE: wids += [0] * (WIN_SIZE-len(wids)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1) cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE,)) pool_out = dy.rectify(pool_out) return W_sm * pool_out + b_sm
def __convolve__(self, embeddings, F, b, W1, bW1): sntlen = len(embeddings) emb = dy.concatenate_cols(embeddings) x = dy.conv2d_bias(emb, F, b, [1, 1], is_valid=False) x = dy.rectify(x) x = dy.maxpooling2d(x, [1, sntlen], [1, 1], is_valid=True) if self.DROPOUT > 0: dy.dropout(x, self.DROPOUT) f = dy.reshape(x, (self.EMB_DIM * 1 * 100, )) return W1 * f + bW1
def conv(input_): """Perform the 1D conv. :param input: dy.Expression ((1, T, dsz), B) Returns: dy.Expression ((cmotsz,), B) """ c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False) activation = dy.rectify(c) mot = dy.reshape(dy.max_dim(activation, 1), (cmotsz, )) return mot
def convnet(self, image): x = dy.inputTensor(image) x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False) x = dy.maxpooling2d(x, [2, 2], [2, 2], is_valid=False) x = dy.rectify(x) x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False) x = dy.maxpooling2d(x, [2, 2], [2, 2], is_valid=False) x = dy.rectify(x) x1 = dy.conv2d_bias(x, self.F31, self.b31, [1, 1], is_valid=False) x1 = dy.maxpooling2d(x1, [2, 2], [2, 2], is_valid=False) x1 = dy.rectify(x1) # x2 = dy.conv2d_bias(x, self.F32, self.b32, [1, 1], is_valid=False) # x2 = dy.maxpooling2d(x2, [2, 2], [2, 2], is_valid=False) x1 = dy.conv2d_bias(x1, self.F41, self.b41, [1, 1], is_valid=False) x1 = dy.maxpooling2d(x1, [2, 2], [2, 2], is_valid=False) x1 = dy.rectify(x1) # x2 = dy.conv2d_bias(x2, self.F42, self.b42, [1, 1], is_valid=False) # x2 = dy.maxpooling2d(x2, [2, 2], [2, 2], is_valid=False) x1 = dy.conv2d_bias(x1, self.F51, self.b51, [1, 1], is_valid=False) x1 = dy.maxpooling2d(x1, [2, 2], [2, 2], is_valid=False) x1 = dy.rectify(x1) # # x2 = dy.conv2d_bias(x2, self.F52, self.b52, [1, 1], is_valid=False) # x2 = dy.maxpooling2d(x2, [2, 2], [2, 2], is_valid=False) x = dy.reshape(x1, (self.RESHAPING, )) # x2 = dy.reshape(x2, (self.RESHAPING,)) # x = dy.concatenate([x1, x2]) vector = self.W1 * x + self.bW1 # vector = self.W2 * vector + self.bW2 return vector
def encode(self, word, training=False): W_cnn = dy.parameter(self.W_cnn) b_cnn = dy.parameter(self.b_cnn) embs = dy.concatenate( [dy.lookup(self.char_embeds, x) for x in word[:45]], d=1) if self.dropout > 0 and training: embs = dy.dropout(embs, self.dropout) cnn_out = dy.conv2d_bias( embs, W_cnn, b_cnn, stride=(1, 1), is_valid=False) # maybe change this? diagram shows padding max_pool = dy.max_dim(cnn_out, d=1) rep = dy.reshape(dy.tanh(max_pool), (self.filter_size, )) return rep
def conv(input_): """Perform the 1D conv. :param input: dy.Expression ((1, T, dsz), B) Returns: dy.Expression ((cmotsz,), B) """ c = dy.conv2d_bias(input_, weight, bias, strides, is_valid=False) activation = dy.rectify(c) # dy.max_dim(x, d=0) is currently slow (see https://github.com/clab/dynet/issues/1011) # So we do the max using max pooling instead. ((_, seq_len, _), _) = activation.dim() pooled = dy.maxpooling2d(activation, [1, seq_len, 1], strides) mot = dy.reshape(pooled, (cmotsz,)) return mot
def predict_emb(self, chars): dy.renew_cg() conv_param = dy.parameter(self.conv) conv_param_bias = dy.parameter(self.conv_bias) H = dy.parameter(self.cnn_to_rep_params) Hb = dy.parameter(self.cnn_to_rep_bias) O = dy.parameter(self.mlp_out) Ob = dy.parameter(self.mlp_out_bias) # padding pad_char = self.c2i[PADDING_CHAR] padding_size = self.window_width // 2 # TODO also consider w_stride? char_ids = ([pad_char] * padding_size) + chars + ([pad_char] * padding_size) if len(chars) < self.pooling_maxk: # allow k-max pooling layer output to transform to affine char_ids.extend([pad_char] * (self.pooling_maxk - len(chars))) embeddings = dy.concatenate_cols( [self.char_lookup[cid] for cid in char_ids]) reshaped_embeddings = dy.reshape(dy.transpose(embeddings), (1, len(char_ids), self.char_dim)) # not using is_valid=False due to maxk-pooling-induced extra padding conv_out = dy.conv2d_bias(reshaped_embeddings, conv_param, conv_param_bias, self.stride, is_valid=True) relu_out = dy.rectify(conv_out) ### pooling when max_k can only be 1, not sure what other differences may be #poolingk = [1, len(chars)] #pooling_out = dy.maxpooling2d(relu_out, poolingk, self.stride, is_valid=True) #pooling_out_flat = dy.reshape(pooling_out, (self.hidden_dim,)) ### another possible way for pooling is just max_dim(relu_out, d=1) pooling_out = dy.kmax_pooling(relu_out, self.pooling_maxk, d=1) # d = what dimension to max over pooling_out_flat = dy.reshape(pooling_out, (self.hidden_dim * self.pooling_maxk, )) return O * dy.tanh(H * pooling_out_flat + Hb) + Ob
def compose(self, embeds): if type(embeds) != list: embeds = [ dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1]) ] if len(embeds) < self.ngram_size: embeds.extend([dy.zeros(self.embed_dim)] * (self.ngram_size - len(embeds))) embeds = dy.transpose( dy.concatenate([dy.concatenate_cols(embeds)], d=2), [2, 1, 0]) embeds = dy.conv2d_bias(embeds, self.filter, self.bias, (self.embed_dim, 1)) embeds = dy.max_dim(dy.pick(embeds, index=0), d=0) return self.transform.transform(embeds)
def _build_tagging_graph(self, words, train_mode=True): """ Builds the computational graph. Model similar to http://aclweb.org/anthology/D/D14/D14-1181.pdf. """ dy.renew_cg() # turn parameters into expressions mlp_output = dy.parameter(self.pO) W_cnn_expressions = [] b_cnn_expressions = [] for W_cnn, b_cnn in zip(self.W_cnns, self.b_cnns): W_cnn_expressions.append(dy.parameter(W_cnn)) b_cnn_expressions.append(dy.parameter(b_cnn)) if len(words) < self._cnn_window_size: pad_char = "<*>" words += [pad_char] * (self._cnn_window_size - len(words)) if self._char_level: cnn_in = dy.concatenate(self._chars_rep(words), d=1) else: word_reps = [self._word_rep(word) for word in words] cnn_in = dy.concatenate(word_reps, d=1) pools_out = [] for W_cnn_express, b_cnn_express in zip(W_cnn_expressions, b_cnn_expressions): cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False) # max-pooling pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (self._cnn_filter_size, )) pools_out.append(pool_out) pools_concat = dy.concatenate(pools_out) return mlp_output * pools_concat
def calc_scores(words): dy.renew_cg() W_cnn_express = dy.parameter(W_cnn) b_cnn_express = dy.parameter(b_cnn) W_sm_express = dy.parameter(W_sm) b_sm_express = dy.parameter(b_sm) # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step. # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding. # Padd with index 0. (so we're treating the pad words as UNK (?)) if len(words) < WIN_SIZE: words += [0] * (WIN_SIZE - len(words)) cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in words], d=1) # concat repr of all words cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (FILTER_SIZE, )) pool_out = dy.rectify(pool_out) return W_sm_express * pool_out + b_sm_express
def calc_scores(self, sentences, meta_data=None, get_probability=True): """ calculating the score for parallel LSTM network (in a specific state along learning phase) :param sentences: list list of lists of sentences (represented already as numbers and not letters) :param W_emb: model parameter (dynet obj). size: matrix holding weights of the mlp phase :param W_cnn: model parameter (dynet obj). size: vector holding weights of intercept for each hidden state :param b_cnn: model parameter (dynet obj). size: matrix holding weights of the logisitc regression phase. 2 is there due to the fact we are in a binary classification :param W_sm: model parameter (dynet obj). size: intercept value for the logistic regression phase :param b_sm: dict or None :return: dynet parameter. size: (2,) prediction of the instance to be a drawing one according to the model (vector of 2, first place is the probability to be a drawing team) """ #dy.renew_cg() # padding with zeros in case sentences are too short for words in sentences: if len(words) < self.win_size: words += [0] * (self.win_size - len(words)) # looping over each sentence, calculating the CNN max pooling and taking the average at the end pool_out_agg = [] #for cur_sentences in sentences: for words in sentences: #cnn_in = dy.concatenate([dy.lookup(W_emb, x) for words in cur_sentences for x in words], d=1) cnn_in = dy.concatenate([dy.lookup(self.W_emb, x) for x in words], d=1) cnn_out = dy.conv2d_bias(cnn_in, self.W_cnn, self.b_cnn, stride=(1, 1), is_valid=False) pool_out = dy.max_dim(cnn_out, d=1) pool_out = dy.reshape(pool_out, (self.filter_size, )) pool_out = dy.rectify(pool_out) # Relu function: max(x_i, 0) pool_out_agg.append(pool_out) pool_out_avg = dy.average(pool_out_agg) if meta_data is None: h = dy.tanh((self.W_mlp * pool_out_avg) + self.b_mlp) prediction = dy.logistic((self.V_mlp * h) + self.a_mlp) if get_probability: return prediction else: return pool_out_avg else: meta_data_ordered = [ value for key, value in sorted(meta_data.items()) ] meta_data_vector = dy.inputVector(meta_data_ordered) first_layer_avg_and_meta_data = dy.concatenate( [pool_out_avg, meta_data_vector]) h = dy.tanh((self.W_mlp * first_layer_avg_and_meta_data) + self.b_mlp) prediction = dy.logistic((self.V_mlp * h) + self.a_mlp) if get_probability: return prediction else: return first_layer_avg_and_meta_data
def transduce( self, expr_seq: expression_seqs.ExpressionSequence ) -> expression_seqs.ExpressionSequence: """ transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c) Args: expr_seq: expression sequence (will be accessed via tensor_expr) Return: expression sequence """ if isinstance(expr_seq, list): mask_out = expr_seq[0].mask seq_len = len(expr_seq[0]) batch_size = expr_seq[0].dim()[1] tensors = [e.as_tensor() for e in expr_seq] input_tensor = dy.reshape(dy.concatenate(tensors), (seq_len, 1, self.input_dim), batch_size=batch_size) else: mask_out = expr_seq.mask seq_len = len(expr_seq) batch_size = expr_seq.dim()[1] input_tensor = dy.reshape(dy.transpose(expr_seq.as_tensor()), (seq_len, 1, self.input_dim), batch_size=batch_size) if self.dropout > 0.0 and self.train: input_tensor = dy.dropout(input_tensor, self.dropout) proj_inp = dy.conv2d_bias(input_tensor, dy.parameter(self.p_f), dy.parameter(self.p_b), stride=(self.stride, 1), is_valid=False) reduced_seq_len = proj_inp.dim()[0][0] proj_inp = dy.transpose( dy.reshape(proj_inp, (reduced_seq_len, self.hidden_dim * 3), batch_size=batch_size)) # proj_inp dims: (hidden, 1, seq_len), batch_size if self.stride > 1 and mask_out is not None: mask_out = mask_out.lin_subsampled(trg_len=reduced_seq_len) h = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)] c = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)] for t in range(reduced_seq_len): f_t = dy.logistic( dy.strided_select(proj_inp, [], [0, t], [self.hidden_dim, t + 1])) o_t = dy.logistic( dy.strided_select(proj_inp, [], [self.hidden_dim, t], [self.hidden_dim * 2, t + 1])) z_t = dy.tanh( dy.strided_select(proj_inp, [], [self.hidden_dim * 2, t], [self.hidden_dim * 3, t + 1])) if self.dropout > 0.0 and self.train: retention_rate = 1.0 - self.dropout dropout_mask = dy.random_bernoulli((self.hidden_dim, 1), retention_rate, batch_size=batch_size) f_t = 1.0 - dy.cmult( dropout_mask, 1.0 - f_t ) # TODO: would be easy to make a zoneout dynet operation to save memory i_t = 1.0 - f_t if t == 0: c_t = dy.cmult(i_t, z_t) else: c_t = dy.cmult(f_t, c[-1]) + dy.cmult(i_t, z_t) h_t = dy.cmult( o_t, c_t) # note: LSTM would use dy.tanh(c_t) instead of c_t if mask_out is None or np.isclose( np.sum(mask_out.np_arr[:, t:t + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append( mask_out.cmult_by_timestep_expr(c_t, t, True) + mask_out.cmult_by_timestep_expr(c[-1], t, False)) h.append( mask_out.cmult_by_timestep_expr(h_t, t, True) + mask_out.cmult_by_timestep_expr(h[-1], t, False)) self._final_states = [transducers.FinalTransducerState(dy.reshape(h[-1], (self.hidden_dim,), batch_size=batch_size), \ dy.reshape(c[-1], (self.hidden_dim,), batch_size=batch_size))] return expression_seqs.ExpressionSequence(expr_list=h[1:], mask=mask_out)