def do_one_batch(X_batch, Z_batch): # Flatten the batch into 1-D vector for workaround batch_size = X_batch.shape[0] if DO_BATCH: X_batch_f = X_batch.flatten('F') Z_batch_f = Z_batch.flatten('F') x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes), batch_size=batch_size) z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg), batch_size=batch_size) scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])]) vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])]) else: x = dy.matInput(X_batch.shape[0], X_batch.shape[1]) x.set(X_batch.flatten('F')) z = dy.vecInput(Z_batch.shape[0]) z.set(Z_batch.flatten('F')) x = dy.reshape(dy.transpose(x, [1, 0]), (1, X_batch.shape[1], X_batch.shape[0])) print(x.npvalue().shape) a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False) h1 = dy.rectify(a_h1) h1_pool = dy.kmax_pooling(h1, D[1], d=1) a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False) h2 = dy.rectify(a_h2) h2_pool = dy.kmax_pooling(h2, D[2], d=1) a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False) h3 = dy.rectify(a_h3) h3_pool = dy.kmax_pooling(h3, D[3], d=1) h4 = dy.kmax_pooling(h3_pool, 1, d=1) h4_re = dy.reshape(h4, (J[3], )) #print(h4_re.npvalue().shape) g = dy.scalarInput(1.) zem_sp = dy.weight_norm(h4_re, g) #print(zem_sp.npvalue().shape) zem_vgg = w_embed * z + b_embed #print(zem_vgg.npvalue().shape) sa = dy.transpose(zem_sp) * zem_vgg s = dy.rectify(sa) if PRINT_EMBED: print('Vgg embedding vector:', zem_vgg.npvalue().shape) print(zem_vgg.value()) print('Speech embedding vector:', zem_sp.npvalue().shape) print(zem_sp.value()) if PRINT_SIM: print('Raw Similarity:', sa.npvalue()) print(sa.value()) print('Similarity:', s.npvalue()) print(s.value()) return s
def __call__(self, sequence): next_input = [dy.lookup(self._E, self._W2I[i]) if i in self._W2I else dy.lookup(self._E, self._W2I["UNK"]) for i in sequence] for layer in self._stacks[0:-1]: output = layer(next_input) next_input = [dy.concatenate([next_input[i], output[i]]) for i in range(len(sequence))] output = self._stacks[-1](next_input) exp_output = dy.concatenate_cols(output) v = dy.kmax_pooling(exp_output, 1, d=1) return v
def predict_emb(self, chars): dy.renew_cg() conv_param = dy.parameter(self.conv) conv_param_bias = dy.parameter(self.conv_bias) H = dy.parameter(self.cnn_to_rep_params) Hb = dy.parameter(self.cnn_to_rep_bias) O = dy.parameter(self.mlp_out) Ob = dy.parameter(self.mlp_out_bias) # padding pad_char = self.c2i[PADDING_CHAR] padding_size = self.window_width // 2 # TODO also consider w_stride? char_ids = ([pad_char] * padding_size) + chars + ([pad_char] * padding_size) if len(chars) < self.pooling_maxk: # allow k-max pooling layer output to transform to affine char_ids.extend([pad_char] * (self.pooling_maxk - len(chars))) embeddings = dy.concatenate_cols( [self.char_lookup[cid] for cid in char_ids]) reshaped_embeddings = dy.reshape(dy.transpose(embeddings), (1, len(char_ids), self.char_dim)) # not using is_valid=False due to maxk-pooling-induced extra padding conv_out = dy.conv2d_bias(reshaped_embeddings, conv_param, conv_param_bias, self.stride, is_valid=True) relu_out = dy.rectify(conv_out) ### pooling when max_k can only be 1, not sure what other differences may be #poolingk = [1, len(chars)] #pooling_out = dy.maxpooling2d(relu_out, poolingk, self.stride, is_valid=True) #pooling_out_flat = dy.reshape(pooling_out, (self.hidden_dim,)) ### another possible way for pooling is just max_dim(relu_out, d=1) pooling_out = dy.kmax_pooling(relu_out, self.pooling_maxk, d=1) # d = what dimension to max over pooling_out_flat = dy.reshape(pooling_out, (self.hidden_dim * self.pooling_maxk, )) return O * dy.tanh(H * pooling_out_flat + Hb) + Ob
def transduce(self, src): src = src.as_tensor() src_height = src.dim()[0][0] src_width = src.dim()[0][1] src_channels = 1 batch_size = src.dim()[1] src = dy.reshape(src, (src_height, src_width, src_channels), batch_size=batch_size) # ((276, 80, 3), 1) # print(self.filters1) # convolution and pooling layers l1 = dy.rectify( dy.conv2d(src, dy.parameter(self.filters1), stride=[self.stride[0], self.stride[0]], is_valid=True)) pool1 = dy.maxpooling2d(l1, (1, 4), (1, 2), is_valid=True) l2 = dy.rectify( dy.conv2d(pool1, dy.parameter(self.filters2), stride=[self.stride[1], self.stride[1]], is_valid=True)) pool2 = dy.maxpooling2d(l2, (1, 4), (1, 2), is_valid=True) l3 = dy.rectify( dy.conv2d(pool2, dy.parameter(self.filters3), stride=[self.stride[2], self.stride[2]], is_valid=True)) pool3 = dy.kmax_pooling(l3, 1, d=1) # print(pool3.dim()) output = dy.cdiv(pool3, dy.sqrt(dy.squared_norm(pool3))) output = dy.reshape(output, (self.num_filters[2], ), batch_size=batch_size) # print("my dim: ", output.dim()) return ExpressionSequence(expr_tensor=output)
def GetPOSIT(self, qvecs, sims, w2v_sims, matches): qscores = [] for qtok in range(len(qvecs)): # Basic matches, max-sim, average-kmax-sim, exact match svec = dy.concatenate(sims[qtok]) sim = dy.kmax_pooling(dy.transpose(svec), 1)[0] sim5 = dy.mean_elems(dy.kmax_pooling(dy.transpose(svec), 5)[0]) wvec = dy.concatenate(w2v_sims[qtok]) wsim = dy.kmax_pooling(dy.transpose(wvec), 1)[0] wsim5 = dy.mean_elems(dy.kmax_pooling(dy.transpose(wvec), 5)[0]) mvec = dy.concatenate(matches[qtok]) msim = dy.kmax_pooling(dy.transpose(mvec), 1)[0] msim5 = dy.mean_elems(dy.kmax_pooling(dy.transpose(mvec), 5)[0]) layer1 = (self.W_term1.expr() * dy.concatenate( [sim, sim5, wsim, wsim5, msim, msim5 ]) + self.b_term1.expr()) qscores.append(self.W_term.expr() * utils.leaky_relu(layer1)) return qscores
e = dy.concatenate([e1, e2, ...]) # concatenate e = dy.affine_transform([e0, e1, e2, ...]) # e = e0 + ((e1*e2) + (e3*e4) ...) ## Loss functions e = dy.squared_distance(e1, e2) e = dy.l1_distance(e1, e2) e = dy.huber_distance(e1, e2, c=1.345) # e1 must be a scalar that is a value between 0 and 1 # e2 (ty) must be a scalar that is a value between 0 and 1 # e = ty * log(e1) + (1 - ty) * log(1 - e1) e = dy.binary_log_loss(e1, e2) # e1 is row vector or scalar # e2 is row vector or scalar # m is number # e = max(0, m - (e1 - e2)) e = dy.pairwise_rank_loss(e1, e2, m=1.0) # Convolutions # e1 \in R^{d x s} (input) # e2 \in R^{d x m} (filter) e = dy.conv1d_narrow(e1, e2) # e = e1 *conv e2 e = dy.conv1d_wide(e1, e2) # e = e1 *conv e2 e = dy.filter1d_narrow(e1, e2) # e = e1 *filter e2 e = dy.kmax_pooling(e1, k) # kmax-pooling operation (Kalchbrenner et al 2014) e = dy.kmh_ngram(e1, k) # e = dy.fold_rows(e1, nrows=2) #
def build_graph(pre_words, hy_words, holder): fl1_init = holder.fwdRNN_layer1.initial_state() bl1_init = holder.bwdRNN_layer1.initial_state() fl2_init = holder.fwdRNN_layer2.initial_state() bl2_init = holder.bwdRNN_layer2.initial_state() fl3_init = holder.fwdRNN_layer3.initial_state() bl3_init = holder.bwdRNN_layer3.initial_state() pre_wembs = [get_word_rep(w, holder) for w in pre_words] hy_wembs = [get_word_rep(w, holder) for w in hy_words] pre_fws = fl1_init.transduce(pre_wembs) pre_bws = bl1_init.transduce(reversed(pre_wembs)) pre_bi = [ dy.concatenate([word, f, b]) for word, f, b in zip(pre_wembs, pre_fws, reversed(pre_bws)) ] pre_fws2 = fl2_init.transduce(pre_bi) pre_bws2 = bl2_init.transduce(reversed(pre_bi)) pre_b_tag = [ dy.concatenate([word, f1, b1, f2, b2]) for word, f1, b1, f2, b2 in zip(pre_wembs, pre_fws, reversed(pre_bws), pre_fws2, reversed(pre_bws2)) ] pre_fws3 = fl3_init.transduce(pre_b_tag) pre_bws3 = bl3_init.transduce(reversed(pre_b_tag)) pre_b_tagtag = [ dy.concatenate([f3, b3]) for f3, b3 in zip(pre_fws3, reversed(pre_bws3)) ] pre_v_elemets_size = len(pre_b_tagtag[0].npvalue()) pre_row_num = len(pre_b_tagtag) pre_vecs_concat = dy.concatenate([v for v in pre_b_tagtag]) pre_mat = dy.reshape(pre_vecs_concat, (pre_v_elemets_size, pre_row_num)) pre_final = dy.concatenate([dy.kmax_pooling(v, 1, 0) for v in pre_mat]) hy_fws = fl1_init.transduce(hy_wembs) hy_bws = bl1_init.transduce(reversed(hy_wembs)) hy_bi = [ dy.concatenate([word, f, b]) for word, f, b in zip(hy_wembs, hy_fws, reversed(hy_bws)) ] hy_fws2 = fl2_init.transduce(hy_bi) hy_bws2 = bl2_init.transduce(reversed(hy_bi)) hy_b_tag = [ dy.concatenate([word, f1, b1, f2, b2]) for word, f1, b1, f2, b2 in zip( hy_wembs, hy_fws, reversed(hy_bws), hy_fws2, reversed(hy_bws2)) ] hy_fws3 = fl3_init.transduce(hy_b_tag) hy_bws3 = bl3_init.transduce(reversed(hy_b_tag)) hy_b_tagtag = [ dy.concatenate([f3, b3]) for f3, b3 in zip(hy_fws3, reversed(hy_bws3)) ] hy_v_elemets_size = len(hy_b_tagtag[0].npvalue()) hy_row_num = len(hy_b_tagtag) hy_vecs_concat = dy.concatenate([v for v in hy_b_tagtag]) hy_mat = dy.reshape(hy_vecs_concat, (hy_v_elemets_size, hy_row_num)) hy_final = dy.concatenate([dy.kmax_pooling(v, 1, 0) for v in hy_mat]) final = dy.concatenate([ pre_final, hy_final, dy.abs(pre_final - hy_final), dy.cmult(pre_final, hy_final) ]) W1 = dy.parameter(holder.W1) b1 = dy.parameter(holder.b1) W2 = dy.parameter(holder.W2) b2 = dy.parameter(holder.b2) mid = dy.rectify(W1 * final + b1) return W2 * mid + b2