def apply(self, sent1, sent2): eL = dy.parameter(self.linear) sent1 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent1)) * eL sent2 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent2)) * eL out1, out2 = self.feed_F(sent1, sent2) e_out = out1 * dy.transpose(out2) prob_f_1 = dy.softmax(e_out) score = dy.transpose(e_out) prob_f_2 = dy.softmax(score) sent1_allign = dy.concatenate_cols([sent1, prob_f_1 * sent2]) sent2_allign = dy.concatenate_cols([sent2, prob_f_2 * sent1]) out_g_1, out_g_2 = self.feed_G(sent1_allign, sent2_allign) sent1_out_g = dy.sum_dim(out_g_1, [0]) sent2_out_g = dy.sum_dim(out_g_2, [0]) concat = dy.transpose(dy.concatenate([sent1_out_g, sent2_out_g])) h_step_1 = dy.parameter(self.h_step_1) sent_h = dy.rectify(dy.dropout(concat, 0.2) * h_step_1) h_step_2 = dy.parameter(self.h_step_2) sent_h = dy.rectify(dy.dropout(sent_h, 0.2) * h_step_2) final = dy.parameter(self.linear2) final = dy.transpose(sent_h * final) return final
def __call__(self, sent1, sent2): """ :param sent1: np matrix. :param sent2: np matrix. :return: np array of 3 elements. """ sent1_linear, sent2_linear = self.apply_linear_embed(sent1, sent2) f1, f2 = self.apply_f(sent1_linear, sent2_linear) score1 = f1 * dy.transpose(f2) prob1 = dy.softmax(score1) score2 = dy.transpose(score1) prob2 = dy.softmax(score2) sent1_combine = dy.concatenate_cols( [sent1_linear, prob1 * sent2_linear]) sent2_combine = dy.concatenate_cols( [sent2_linear, prob2 * sent1_linear]) # sum g1, g2 = self.apply_g(sent1_combine, sent2_combine) sent1_output = dy.sum_dim(g1, [0]) sent2_output = dy.sum_dim(g2, [0]) input_combine = dy.transpose( dy.concatenate([sent1_output, sent2_output])) h = self.apply_h(input_combine) linear_final = dy.parameter(self.linear_final) h = h * linear_final output = dy.log_softmax(dy.transpose(h)) return output
def forward(self, s1, s2, label=None): eL = dy.parameter(self.embeddingLinear) s1 = dy.inputTensor(s1) * eL s2 = dy.inputTensor(s2) * eL # F step Lf1 = dy.parameter(self.mlpF1) Fs1 = dy.rectify(dy.dropout(s1, 0.2) * Lf1) Fs2 = dy.rectify(dy.dropout(s2, 0.2) * Lf1) Lf2 = dy.parameter(self.mlpF2) Fs1 = dy.rectify(dy.dropout(Fs1, 0.2) * Lf2) Fs2 = dy.rectify(dy.dropout(Fs2, 0.2) * Lf2) # Attention scoring score1 = Fs1 * dy.transpose(Fs2) prob1 = dy.softmax(score1) score2 = dy.transpose(score1) prob2 = dy.softmax(score2) # Align pairs using attention s1Pairs = dy.concatenate_cols([s1, prob1 * s2]) s2Pairs = dy.concatenate_cols([s2, prob2 * s1]) # G step Lg1 = dy.parameter(self.mlpG1) Gs1 = dy.rectify(dy.dropout(s1Pairs, 0.2) * Lg1) Gs2 = dy.rectify(dy.dropout(s2Pairs, 0.2) * Lg1) Lg2 = dy.parameter(self.mlpG2) Gs1 = dy.rectify(dy.dropout(Gs1, 0.2) * Lg2) Gs2 = dy.rectify(dy.dropout(Gs2, 0.2) * Lg2) # Sum Ss1 = dy.sum_dim(Gs1, [0]) Ss2 = dy.sum_dim(Gs2, [0]) concatS12 = dy.transpose(dy.concatenate([Ss1, Ss2])) # H step Lh1 = dy.parameter(self.mlpH1) Hs = dy.rectify(dy.dropout(concatS12, 0.2) * Lh1) Lh2 = dy.parameter(self.mlpH2) Hs = dy.rectify(dy.dropout(Hs, 0.2) * Lh2) # Final layer final_layer = dy.parameter(self.final_layer) final = dy.transpose(Hs * final_layer) # Label can be 0... if label != None: return dy.pickneglogsoftmax(final, label) else: out = dy.softmax(final) return np.argmax(out.npvalue())
def aggregate_v1_v2(v1, v2, model_params): H_w1 = model_params['H_w1'] H_b1 = model_params['H_b1'] H_w2 = model_params['H_w2'] H_b2 = model_params['H_b2'] v1_sum = dy.sum_dim(v1, [1]) v2_sum = dy.sum_dim(v2, [1]) con = dy.concatenate([v1_sum, v2_sum]) #con = dy.dropout(con, DROPOUT_RATE) y_hat = dy.softmax(H_w2 * (dy.rectify((H_w1 * con) + H_b1)) + H_b2) return y_hat
def get_scores_logsoftmax(self, mlp_dec_state): score = super().get_scores(mlp_dec_state) lex_prob = self.lexicon_prob * self.attender.get_last_attention() # Note that the sum dim is only summing a tensor of 1 size in dim 1. # This is to make sure that the shape of the returned tensor matches the vanilla decoder return dy.sum_dim(self.lexicon_method(mlp_dec_state, score, lex_prob), [1])
def calc_probs(self, x: dy.Expression) -> dy.Expression: model_score = dy.softmax(self.calc_scores(x)) if self.lexicon_type == 'linear': coeff = self.calculate_coeff(x) return dy.sum_dim(dy.cmult(coeff, model_score) + dy.cmult((1-coeff), self.calculate_dict_prob(x)), [1]) else: return model_score
def cross_entropy_structbag(self, P, Q): """ P (K x m) represents a distribution over STRUCTURED labels where each label is a BAG of K INDEPENDENT symbols taking values in {1 ... m}. That is, z = (z1 ... zK) is assigned probability P1(z1) * ... * PK(zK). (Similarly for Q.) By the independence, H(P, Q) = sum_k H(Pk, Qk). """ return -dy.sum_dim(dy.cmult(P, self.log2(Q)), [0, 1])
def __call__(self, query, options, gold, lengths, query_no): if len(options) == 1: return None, 0 final = [] if args.word_vectors: qvecs = [dy.lookup(self.pEmbedding, w) for w in query] qvec_max = dy.emax(qvecs) qvec_mean = dy.average(qvecs) for otext, features in options: if not args.no_features: inputs = dy.inputTensor(features) if args.word_vectors: ovecs = [dy.lookup(self.pEmbedding, w) for w in otext] ovec_max = dy.emax(ovecs) ovec_mean = dy.average(ovecs) if args.no_features: inputs = dy.concatenate( [qvec_max, qvec_mean, ovec_max, ovec_mean]) else: inputs = dy.concatenate( [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean]) if args.drop > 0: inputs = dy.dropout(inputs, args.drop) h = inputs for pH, pB in zip(self.hidden, self.bias): h = dy.affine_transform([pB, pH, h]) if args.nonlin == "linear": pass elif args.nonlin == "tanh": h = dy.tanh(h) elif args.nonlin == "cube": h = dy.cube(h) elif args.nonlin == "logistic": h = dy.logistic(h) elif args.nonlin == "relu": h = dy.rectify(h) elif args.nonlin == "elu": h = dy.elu(h) elif args.nonlin == "selu": h = dy.selu(h) elif args.nonlin == "softsign": h = dy.softsign(h) elif args.nonlin == "swish": h = dy.cmult(h, dy.logistic(h)) final.append(dy.sum_dim(h, [0])) final = dy.concatenate(final) nll = -dy.log_softmax(final) dense_gold = [] for i in range(len(options)): dense_gold.append(1.0 / len(gold) if i in gold else 0.0) answer = dy.inputTensor(dense_gold) loss = dy.transpose(answer) * nll predicted_link = np.argmax(final.npvalue()) return loss, predicted_link
def log_sum_exp(tag_score_arr): argmax = np.argmax(tag_score_arr.value()) max_score = tag_score_arr[argmax] score = max_score max_arr = dynet.concatenate( [max_score for i in range(len(self.pos) + 2)]) score += dynet.log( dynet.sum_dim(dynet.exp(tag_score_arr - max_arr), [0])) return score
def generate(self, src, forced_trg_ids): assert not forced_trg_ids assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented" src = src[0] # Generating outputs outputs = [] event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if self.mode in ["avg_mlp", "final_mlp"]: if self.generate_per_step: assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True" scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings] else: if self.mode == "avg_mlp": encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1]) elif self.mode == "final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode == "lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True)) enc_lin.append(step_linear) if self.generate_per_step: scores = [dy.logistic(enc_i) for enc_i in enc_lin] else: if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") if self.generate_per_step: output_actions = [np.argmax(score_i.npvalue()) for score_i in scores] score = np.sum([np.max(score_i.npvalue()) for score_i in scores]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) else: scores_arr = scores.npvalue() output_actions = list(np.nonzero(scores_arr > 0.5)[0]) score = np.sum(scores_arr[scores_arr > 0.5]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) return outputs
def log_sum_exp(scores): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dy.pick(scores, argmax_score) max_score_expr_broadcast = dy.concatenate([max_score_expr] * self.tagset_size) return max_score_expr + dy.log( dy.sum_dim( dy.transpose(dy.exp(scores - max_score_expr_broadcast)), [1]))
def log_sum_exp(scores): npval = scores.npvalue() argmax_score = np.argmax(npval) max_score_expr = dynet.pick(scores, argmax_score) max_score_expr_broadcast = dynet.concatenate([max_score_expr] * (self.n_tags + 2)) return max_score_expr + dynet.log( dynet.sum_dim( dynet.transpose( dynet.exp(scores - max_score_expr_broadcast)), [1]))
def aggregate(sentence_a, sentence_b): w1 = dy.parameter(decide_w1) b1 = dy.parameter(decide_b1) w2 = dy.parameter(decide_w2) b2 = dy.parameter(decide_b2) sentence_a = dy.sum_dim(sentence_a, [1]) logging.debug("Sentence a reduction shape: " + str(sentence_a.dim())) sentence_b = dy.sum_dim(sentence_b, [1]) logging.debug("Sentence b reduction shape: " + str(sentence_b.dim())) combined = dy.concatenate([sentence_a, sentence_b]) logging.debug("Combined representations shape: " + str(combined.dim())) x = (w1 * combined) + b1 x = dy.rectify(x) logits = (w2 * x) + b2 return logits
def calc_nll(self, src, trg): event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) if self.mode in ["avg_mlp", "final_mlp"]: if self.mode=="avg_mlp": if encodings.mask: encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1] elif self.mode=="final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode=="lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True)) enc_lin.append(step_linear) if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") idxs = ([], []) for batch_i in range(trg.batch_size()): for word in set(trg[batch_i]): if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}: idxs[0].append(word) idxs[1].append(batch_i) trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, ) loss_expr = dy.binary_log_loss(scores, trg_scores) return loss_expr
def sum(x, dim=None, include_batch_dim=False): if isinstance(x, list): return dy.esum(x) head_shape, batch_size = x.dim() if dim is None: x = dy.sum_elems(x) if include_batch_dim and batch_size > 1: return dy.sum_batches(x) else: return x else: if dim == -1: dim = len(head_shape) - 1 return dy.sum_dim(x, d=[dim], b=include_batch_dim)
def _featurize_sentence(self, sentence, is_train, elmo_embeddings): # assert len(sentence) == elmo_embeddings.dim()[1], (elmo_embeddings.dim(), len(sentence)) if is_train: self.lstm.set_dropout(self.dropout) else: self.lstm.disable_dropout() embeddings = [] cur_word_index = 0 for tag, word in [(START, START)] + sentence + [(STOP, STOP)]: if word not in (START, STOP): count = self.word_vocab.count(word) if self.use_elmo: unk_word = (np.random.rand() < 1 / (1 + count)) or (np.random.rand() < 0.1) else: unk_word = np.random.rand() < 1 / (1 + count) if not count or (is_train and unk_word): word = UNK # if random.random() < 0.5: # word = UNK # else: # word = random.choice(self.word_vocab.values) word_embedding = self.word_embeddings[self.word_vocab.index(word)] input_components = [word_embedding] if self.use_elmo: if tag == START or tag == STOP: elmo_embedding = dy.zeros(1024) else: elmo_weights = dy.parameter(self.elmo_weights) elmo_embedding = dy.sum_dim(dy.cmult(elmo_weights, dy.pick(elmo_embeddings, index=cur_word_index, dim=1)), [0]) cur_word_index += 1 input_components.append(elmo_embedding) # else: # input_components[-1] = dy.rectify(self.projection(input_components[-1])) raw_input = dy.concatenate(input_components) if is_train: input = dy.dropout(raw_input, p=0.4) else: input = raw_input embeddings.append(input) return self.lstm.transduce(embeddings)
def on_calc_additional_loss(self, *args, **kwargs): seq_len = len(self.last_output) loss_expr = 0 for pos_i in range(seq_len): input_i = self.last_output[pos_i] affine = self.linear_layer(input_i) softmax_out = dy.softmax(affine) if self.mode == "entropy": loss_expr = loss_expr - dy.sum_dim( dy.cmult(dy.log(softmax_out), softmax_out), d=[0]) elif self.mode == "max": loss_expr = loss_expr - dy.log(dy.max_dim(softmax_out)) else: raise ValueError(f"unknown mode {self.mode}") # loss_expr = loss_expr * (self.scale / seq_len) loss_expr = loss_expr * self.scale return losses.FactoredLossExpr({"enc_entropy": loss_expr})
def _featurize_sentence(self, sentence, is_train, elmo_embeddings, cur_word_index): if is_train: self.lstm.set_dropout(self.dropout) else: self.lstm.disable_dropout() embeddings = [] for tag, word in [(START, START)] + sentence + [(STOP, STOP)]: tag_embedding = self.tag_embeddings[self.tag_vocab.index(tag)] if word not in (START, STOP): count = self.word_vocab.count(word) if not count or ( is_train and (np.random.rand() < 1 / (1 + count) or np.random.rand() < 0.1)): word = UNK word_embedding = self.word_embeddings[self.word_vocab.index(word)] if tag == START or tag == STOP: concatenated_embeddings = [tag_embedding, word_embedding, dy.zeros(1024)] else: elmo_weights = dy.parameter(self.elmo_weights) embedding = dy.sum_dim(dy.cmult(elmo_weights, elmo_embeddings[cur_word_index]), [0]) concatenated_embeddings = [tag_embedding, word_embedding, embedding] cur_word_index += 1 embeddings.append(dy.concatenate(concatenated_embeddings)) return self.lstm.transduce(embeddings)
def _featurize_sentence(self, sentence, is_train, elmo_embeddings): if is_train: self.lstm.set_dropout(self.dropout) else: self.lstm.disable_dropout() embeddings = [] cur_word_index = 0 for tag, word in [(START, START)] + sentence + [(STOP, STOP)]: if word not in (START, STOP): count = self.word_vocab.count(word) unk_word = (np.random.rand() < 1 / (1 + count)) or (np.random.rand() < 0.1) if not count or (is_train and unk_word): word = UNK word_embedding = self.word_embeddings[self.word_vocab.index(word)] input_components = [word_embedding] if self.use_elmo: if tag == START or tag == STOP: elmo_embedding = dy.zeros(1024) else: elmo_weights = dy.parameter(self.elmo_weights) elmo_embedding = dy.sum_dim( dy.cmult( elmo_weights, dy.pick(elmo_embeddings, index=cur_word_index, dim=1)), [0]) cur_word_index += 1 input_components.append(elmo_embedding) embedding = dy.concatenate(input_components) if is_train: embedding = dy.dropout(embedding, p=0.4) embeddings.append(embedding) return self.lstm.transduce(embeddings)
def transduce(self, embeds): return dy.sum_dim(embeds, [1])
def get_bert_embed(self, passage, lang, train=False): orig_tokens = passage bert_tokens = [] # Token map will be an int -> int mapping between the `orig_tokens` index and # the `bert_tokens` index. orig_to_tok_map = [] # Example: # orig_tokens = ["John", "Johanson", "'s", "house"] # bert_tokens == ["[CLS]", "john", "johan", "##son", "'", "s", "house", "[SEP]"] # orig_to_tok_map == [(1), (2,3), (4,5), (6)] bert_tokens.append("[CLS]") for orig_token in orig_tokens: start_token = len(bert_tokens) bert_token = self.tokenizer.tokenize(orig_token) bert_tokens.extend(bert_token) end_token = start_token + len(bert_token) orig_to_tok_map.append(slice(start_token, end_token)) bert_tokens.append("[SEP]") indexed_tokens = self.tokenizer.convert_tokens_to_ids(bert_tokens) tokens_tensor = self.torch.tensor([indexed_tokens]) if self.config.args.bert_gpu: tokens_tensor = tokens_tensor.to('cuda') with self.torch.no_grad(): encoded_layers, _ = self.bert_model(tokens_tensor) assert len( encoded_layers ) == self.bert_layers_count, "Invalid BERT layer count %s" % len( encoded_layers) aligned_layer = [] for layer in range(self.bert_layers_count): aligned_layer.append([]) for mapping_range in orig_to_tok_map: token_embeddings = encoded_layers[layer][0][mapping_range] if self.config.args.bert_token_align_by == "mean": aligned_layer[layer].append( self.torch.mean(token_embeddings, dim=(0, )).cpu().data.numpy()) elif self.config.args.bert_token_align_by == "sum": aligned_layer[layer].append( self.torch.sum(token_embeddings, dim=(0, )).cpu().data.numpy()) elif self.config.args.bert_token_align_by == "first": aligned_layer[layer].append( token_embeddings[0].cpu().data.numpy()) else: raise ValueError("Invalid BERT token align option '%s'" % self.config.args.bert_token_align_by) layer_list_to_use = self.config.args.bert_layers aligned_layer = [aligned_layer[i] for i in layer_list_to_use] if self.config.args.bert_layers_pooling == "weighted": bert_softmax = dy.softmax(self.params["bert_weights"]) embeds = dy.cmult(dy.inputTensor(np.asarray(aligned_layer)), bert_softmax) embeds = dy.sum_dim(embeds, [0]) elif self.config.args.bert_layers_pooling == "concat": embeds = dy.inputTensor(np.concatenate(aligned_layer, axis=1)) elif self.config.args.bert_layers_pooling == "sum": embeds = dy.inputTensor(np.sum(aligned_layer, axis=0)) else: raise ValueError("Invalid BERT pooling option '%s'" % self.config.args.bert_layers_pooling) if self.config.args.bert_multilingual == 0: assert lang if (lang + "_embed") in self.params: lang_embed = self.params[lang + "_embed"] else: lang_embed = self.model.add_parameters(50, init='glorot') self.params[lang + "_embed"] = lang_embed multilingual_embeds = [] for embed in embeds: multilingual_embeds.append(dy.concatenate([lang_embed, embed])) embeds = dy.transpose(dy.concatenate_cols(multilingual_embeds)) if self.config.args.bert_layers_pooling == "weighted": single_token_embed_len = self.bert_embedding_len elif self.config.args.bert_layers_pooling == "concat": single_token_embed_len = self.bert_embedding_len * len( layer_list_to_use) elif self.config.args.bert_layers_pooling == "sum": single_token_embed_len = self.bert_embedding_len else: raise ValueError("Invalid BERT pooling option '%s'" % self.config.args.bert_layers_pooling) if self.config.args.bert_multilingual == 0: single_token_embed_len += 50 # TODO: try dropout strategies like dropping at the per layer embeddings or dropping entire layers. assert embeds.dim() == ((len(passage), single_token_embed_len), 1), "Invalid BERT dim %s" % embeds.dim() assert 0 <= self.config.args.bert_dropout < 1, "Invalid BERT dropout %s" % self.config.args.bert_dropout if train: embeds = dy.dropout(embeds, self.config.args.bert_dropout) return embeds
def main(): dy.renew_cg() try: train_file = open("%s" %(sys.argv[1])) test_file = open("%s" %(sys.argv[2])) except: print("python classification_dynet.py <train_file> <test_file>") sys.exit(1) train_text_set, train_content_label_set, train_type_label_set, unique_content, unique_type = extract_from_json(train_file) test_text_set, test_content_label_set, test_type_label_set, _, _ = extract_from_json(test_file) word_dict = {} word_dict = extract_dictionary(train_text_set, word_dict) word_dict = extract_dictionary(test_text_set, word_dict) train_feature_matrix = generate_feature_matrix(train_text_set, word_dict) test_feature_matrix = generate_feature_matrix(test_text_set, word_dict) features_total = len(train_feature_matrix[0]) para_collec = dy.ParameterCollection() pW1 = para_collec.add_parameters((150, 200), dy.NormalInitializer()) pBias1 = para_collec.add_parameters((150), dy.ConstInitializer(0)) pW2_content = para_collec.add_parameters((100, 150), dy.NormalInitializer()) pBias2_content = para_collec.add_parameters((100), dy.ConstInitializer(0)) pW3_content = para_collec.add_parameters((len(unique_content), 100), dy.NormalInitializer()) pBias3_content = para_collec.add_parameters((len(unique_content)), dy.ConstInitializer(0)) pW2_type = para_collec.add_parameters((50, 150), dy.NormalInitializer()) pBias2_type = para_collec.add_parameters((50), dy.ConstInitializer(0)) pW3_type = para_collec.add_parameters((len(unique_type), 50), dy.NormalInitializer()) pBias3_type = para_collec.add_parameters((len(unique_type)), dy.ConstInitializer(0)) lookup = para_collec.add_lookup_parameters((features_total, 200), dy.NormalInitializer()) trainer = dy.SimpleSGDTrainer(para_collec) for i in range(0, 1): # resample minority and majority classes majority, majority_content_label, majority_type_label, minority, minority_content_label, minority_type_label = label_separator("type", train_feature_matrix, train_content_label_set, train_type_label_set) minority_u_text, minority_u_content_label, minority_u_type_label = resample(minority, minority_content_label, minority_type_label, replace=True, n_samples=int(len(majority) * 3), random_state=123) X_train = train_feature_matrix y_train_content = train_content_label_set y_train_type = train_type_label_set for index in range(0, 500): w1 = dy.parameter(pW1) bias1 = dy.parameter(pBias1) w2_content = dy.parameter(pW2_content) bias2_content = dy.parameter(pBias2_content) w3_content = dy.parameter(pW3_content) bias3_content = dy.parameter(pBias3_content) w2_type = dy.parameter(pW2_type) bias2_type = dy.parameter(pBias2_type) w3_type = dy.parameter(pW3_type) bias3_type = dy.parameter(pBias3_type) input_text = [] input_array = X_train[index] for i in range(0, X_train[index].size): if X_train[index][i] > 0: input_text.append(lookup[X_train[index][i]]) x = dy.concatenate(input_text, 1) e_in = dy.sum_dim(x, [1])/features_total e_affin1 = dy.affine_transform([bias1, w1, e_in]) e_affin1 = dy.rectify(e_affin1) e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1]) e_content_affin2 = dy.dropout(e_content_affin2, 0.5) e_content_affin2 = dy.rectify(e_content_affin2) e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2]) e_content_affin3 = dy.dropout(e_content_affin3, 0.5) e_content_affin3 = dy.rectify(e_content_affin3) e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1]) e_type_affin2 = dy.dropout(e_type_affin2, 0.5) e_type_affin2 = dy.rectify(e_type_affin2) e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2]) e_type_affin3 = dy.dropout(e_type_affin3, 0.5) e_type_affin3 = dy.rectify(e_type_affin3) content_output = dy.pickneglogsoftmax(e_content_affin3, y_train_content[index]) content_loss = content_output.scalar_value() type_output = dy.pickneglogsoftmax(e_type_affin3, y_train_type[index]) type_loss = type_output.scalar_value() if index % 100 == 0: print(index, ": content_loss: ", content_loss, "type_loss", type_loss) content_output.backward() trainer.update() type_output.backward() trainer.update() dy.cg_checkpoint() print("testing...") pred_content = [] pred_type = [] w1 = dy.parameter(pW1) bias1 = dy.parameter(pBias1) w2_content = dy.parameter(pW2_content) bias2_content = dy.parameter(pBias2_content) w3_content = dy.parameter(pW3_content) bias3_content = dy.parameter(pBias3_content) w2_type = dy.parameter(pW2_type) bias2_type = dy.parameter(pBias2_type) w3_type = dy.parameter(pW3_type) bias3_type = dy.parameter(pBias3_type) for index in range(0, len(test_feature_matrix)): input_text = [] line = train_text_set[index] for word in line: # check if RT if word == "RT": input_text.append(lookup[len(word_dict)]) # check if hashtag if word[0] == "#": input_text.append(lookup[len(word_dict) + 1]) # check if mention if word[0] == "@": input_text.append(lookup[len(word_dict) + 2]) # just word itself if word in word_dict: input_text.append(lookup[word_dict[word]]) try: # lower capiticalization of the word lower_word = str(word).lower() input_text.append(lookup[word_dict[lower_word]]) # no punctuation replace_punctuation = str(word).maketrans(string.punctuation, '') clean_word = str(word).translate(replace_punctuation) input_text.append(lookup[word_dict[clean_word]]) except: continue e_in = dy.sum_dim(x, [1])/features_total e_affin1 = dy.affine_transform([bias1, w1, e_in]) e_affin1 = dy.rectify(e_affin1) e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1]) e_content_affin2 = dy.rectify(e_content_affin2) e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2]) e_content_affin3 = dy.rectify(e_content_affin3) e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1]) e_type_affin2 = dy.rectify(e_type_affin2) e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2]) e_type_affin3 = dy.rectify(e_type_affin3) content_output = np.argmax(e_content_affin3.npvalue()) pred_content.append(content_output) type_output = np.argmax(e_type_affin3.npvalue()) pred_type.append(type_output) misclassification_content = 0 misclassification_type = 0 for index in range(0, len(pred_content)): if pred_content[index] != test_content_label_set[index]: misclassification_content += 1 if pred_type[index] != test_type_label_set[index]: misclassification_type += 1 print("content acc: ", (1 - float(misclassification_content/len(pred_content)))) print("type acc: ", (1 - float(misclassification_type/len(pred_type))))
def __call__(self, src_encodings, trg_encodings): src_avg = dy.sum_dim(src_encodings.as_tensor(), [1]) / (src_encodings.as_tensor().dim()[0][1]) trg_avg = dy.sum_dim(trg_encodings.as_tensor(), [1]) / (trg_encodings.as_tensor().dim()[0][1]) return self.dist_op(src_avg - trg_avg)
def mi_zero(self, joint): prior1 = dy.sum_dim(joint, [1]) prior2 = dy.sum_dim(joint, [0]) return self.mi_zero_with_priors(joint, prior1, prior2)
def log_sum_exp(scores, num_labels): max_score_expr = dy.max_dim(scores) max_score_expr_broadcast = dy.concatenate([max_score_expr] * num_labels) return max_score_expr + dy.log( dy.sum_dim(dy.exp(scores - max_score_expr_broadcast), [0]))
def forward(self, sent1, sent2, label=None): """ :param sent1: inputTensor :param sent2: inputTensor :param label: integer, range [0, 2] :return: loss """ # Fix embedding eL = dy.parameter(self.embeddingLinear) sent1 = dy.inputTensor(sent1) * eL sent2 = dy.inputTensor(sent2) * eL # F step Lf1 = dy.parameter(self.mlpF1) Fsent1 = dy.rectify(dy.dropout(sent1, 0.2) * Lf1) Fsent2 = dy.rectify(dy.dropout(sent2, 0.2) * Lf1) Lf2 = dy.parameter(self.mlpF2) Fsent1 = dy.rectify(dy.dropout(Fsent1, 0.2) * Lf2) Fsent2 = dy.rectify(dy.dropout(Fsent2, 0.2) * Lf2) # Attention scoring score1 = Fsent1 * dy.transpose(Fsent2) prob1 = dy.softmax(score1) score2 = dy.transpose(score1) prob2 = dy.softmax(score2) # Align pairs using attention sent1Pairs = dy.concatenate_cols([sent1, prob1 * sent2]) sent2Pairs = dy.concatenate_cols([sent2, prob2 * sent1]) # G step Lg1 = dy.parameter(self.mlpG1) Gsent1 = dy.rectify(dy.dropout(sent1Pairs, 0.2) * Lg1) Gsent2 = dy.rectify(dy.dropout(sent2Pairs, 0.2) * Lg1) Lg2 = dy.parameter(self.mlpG2) Gsent1 = dy.rectify(dy.dropout(Gsent1, 0.2) * Lg2) Gsent2 = dy.rectify(dy.dropout(Gsent2, 0.2) * Lg2) # Sum Ssent1 = dy.sum_dim(Gsent1, [0]) Ssent2 = dy.sum_dim(Gsent2, [0]) concat = dy.transpose(dy.concatenate([Ssent1, Ssent2])) # H step Lh1 = dy.parameter(self.mlpH1) Hsent = dy.rectify(dy.dropout(concat, 0.2) * Lh1) Lh2 = dy.parameter(self.mlpH2) Hsent = dy.rectify(dy.dropout(Hsent, 0.2) * Lh2) # Final layer finalLayer = dy.parameter(self.finaLinear) # final = dy.softmax(dy.transpose(Hsent * finalLayer)) final = dy.transpose(Hsent * finalLayer) if label != None: # Label can be 0... return dy.pickneglogsoftmax(final, label) else: out = dy.softmax(final) chosen = np.argmax(out.npvalue()) return chosen
def calc_scores(self, x: dy.Expression) -> dy.Expression: model_score = self.output_projector.transform(x) if self.lexicon_type == 'bias': model_score += dy.sum_dim(dy.log(self.calculate_dict_prob(x) + self.lexicon_alpha), [1]) return model_score