def get_score(self,word,context): ## Get the loss given word, context pair and perform negative sampling objective = dy.logistic(((dy.transpose(self.context_embeddings[context]))*self.word_embeddings[word])) negative_sample = np.random.choice(self.context_size, self.num_sampled, replace=False, p=self.context_fre) for context_prime in negative_sample: objective *= dy.logistic(-((dy.transpose(self.context_embeddings[context_prime]))*self.word_embeddings[word])) loss = -dy.log(objective) return loss
def __call__(self, input_exp, hidden_exp, mask=None): # two kinds of dropouts if self.idrop > 0.: input_exp = dy.dropout(input_exp, self.idrop) input_exp_g = input_exp_t = input_exp hidden_exp_g = hidden_exp_t = hidden_exp["H"] if self.gdrop > 0.: input_exp_g = dy.cmult(input_exp, self.masks[0]) hidden_exp_g = dy.cmult(hidden_exp_g, self.masks[1]) input_exp_t = dy.cmult(input_exp, self.masks[2]) hidden_exp_t = dy.cmult(hidden_exp_t, self.masks[3]) rzt = dy.affine_transform([ self.iparams["brz"], self.iparams["x2rz"], input_exp_g, self.iparams["h2rz"], hidden_exp_g ]) rzt = dy.logistic(rzt) rt, zt = dy.pick_range(rzt, 0, self.n_hidden), BK.pick_range( rzt, self.n_hidden, 2 * self.n_hidden) h_reset = dy.cmult(rt, hidden_exp_t) ht = dy.affine_transform([ self.iparams["bh"], self.iparams["x2h"], input_exp_t, self.iparams["h2h"], h_reset ]) ht = dy.tanh(ht) hidden = dy.cmult(zt, hidden_exp["H"]) + dy.cmult( (1. - zt), ht) # first one use original hh # mask: if 0 then pass through if mask is not None: mask_array = np.asarray(mask).reshape((1, -1)) m1 = dy.inputTensor(mask_array, True) # 1.0 for real words m0 = dy.inputTensor(1.0 - mask_array, True) # 1.0 for padding words (mask=0) hidden = hidden * m1 + hidden_exp["H"] * m0 return {"H": hidden}
def get_pointergen_probs(self, c_t, state, x_t, a_t, probs, src1): if not self.pointer_gen: return probs, 1.0 unk_idx = self.tgt_vocab.str2int(UNK) p_gen = dy.logistic( self.ptr_w_c * c_t + self.ptr_w_s * dy.concatenate(list(state.s())) + self.ptr_w_x * x_t ) gen_probs = probs * p_gen copy_probs = a_t * (1 - p_gen) copy_probs_update = [] for i in gen_probs: copy_probs_update.append([i]) for char, prob in zip(src1, copy_probs): cur_idx = self.tgt_vocab.str2int(self.src1_vocab.int2str(char)) if cur_idx == unk_idx: continue if isinstance(cur_idx, int): copy_probs_update[cur_idx].append(prob) else: for idx in cur_idx: copy_probs_update[idx].append(prob / len(cur_idx)) sum_probs = dy.concatenate([dy.esum(exps) for exps in copy_probs_update]) return sum_probs, p_gen.scalar_value()
def compute_output_layer(self, input): res = [input] for i, p in enumerate(self.parameters): W, b = dy.parameter(p[0]), dy.parameter(p[1]) if i == len(self.parameters) - 1: res.append(dy.logistic(W * res[-1] + b)) else: res.append(self.activation(W * res[-1] + b)) return res
def __call__(self, x_embs): x_len = len(x_embs) # BiGRU hf = dy.concatenate_cols( self.fGRUBuilder.initial_state().transduce(x_embs)) hb = dy.concatenate_cols(self.bGRUBuilder.initial_state().transduce( x_embs[::-1])[::-1]) h = dy.concatenate([hf, hb]) # Selective Gate hb_1 = dy.pick(hb, index=0, dim=1) hf_n = dy.pick(hf, index=x_len - 1, dim=1) s = dy.concatenate([hb_1, hf_n]) # Selection sGate = dy.logistic(dy.colwise_add(self.Ws * h, self.Us * s + self.bs)) hp = dy.cmult(h, sGate) return hp, hb_1
def get_features_for_tagging(self, sentence, training): word_feats = [ dy.affine_transform( [ self.feat_b, self.feat_w, dy.inputTensor(feats.reshape(self.featsize, 1)), ] ) for chars, word, feats, tag in sentence ] zero_feats = [ dy.inputTensor(np.zeros(shape=(FEAT_OUT_SIZE, 1))) for chars, word, feats, tag in sentence ] # Non-linear transform for soft gazetteer features if self.feat_func == "tanh": word_feats = [dy.tanh(feat) for feat in word_feats] elif self.feat_func == "relu": word_feats = [dy.rectify(feat) for feat in word_feats] # Soft gazetteer features at the LSTM level if self.lstm_feats: cur_feats = word_feats else: cur_feats = zero_feats word_reps = [ dy.concatenate( [self.cnn.encode(chars, training), self.word_embeds[word], enc_feat] ) for enc_feat, (chars, word, feats, tag) in zip(cur_feats, sentence) ] contexts = self.word_lstm.transduce(word_reps) # Soft gazetteer features at the CRF level if self.crf_feats: cur_feats = word_feats else: cur_feats = zero_feats features = [ dy.affine_transform( [ self.context_to_emit_b, self.context_to_emit_w, dy.concatenate([context, feats]), ] ) for context, feats in zip(contexts, cur_feats) ] t_features = [ dy.reshape( dy.affine_transform( [ self.context_to_trans_b, self.context_to_trans_w, dy.concatenate([context, feats]), ] ), (self.num_tags, self.num_tags), ) for context, feats in zip(contexts, cur_feats) ] # Autoencoder feature reconstruction if self.lstm_feats: feat_reconstruct = [ dy.logistic( dy.affine_transform( [self.feat_reconstruct_b, self.feat_reconstruct_w, context] ) ) for context in contexts ] else: feat_reconstruct = [ dy.inputTensor(np.zeros(shape=(self.featsize,))) for context in contexts ] return features, t_features, feat_reconstruct