def _arg_closest_related_questions(self, question, related_questions): all_question = [question] + related_questions q_char_ids, q_word_ids = zip(*[zip(*zip(*x)) for x in all_question]) padded_q_word_ids, q_sequence_lengths = pad_sequences(q_word_ids, pad_tok=0) padded_q_char_ids, q_word_lengths = pad_sequences(q_char_ids, pad_tok=0, nlevels=2) feed_dict = { self.model.q_word_ids: padded_q_word_ids, self.model.q_char_ids: padded_q_char_ids, self.model.q_sequence_lengths: q_sequence_lengths, self.model.q_word_lengths: q_word_lengths, self.model.keep_op: 1.0, self.model.is_training: False } question_embeddings = self.model.sess.run(self.model.q_dense, feed_dict=feed_dict) q = question_embeddings[0] # 1, 300 rq = question_embeddings[1:] scores = np.sum(np.square(rq - q), axis=-1) q_min = scores.argsort()[0] return q_min
def _next_batch(self, data, num_batch): start = 0 idx = 0 while idx < num_batch: word_ids = data['words'][start:start + self.batch_size] pos_ids = data['poses'][start:start + self.batch_size] labels = data['labels'][start:start + self.batch_size] relation_ids = data['relations'][start:start + self.batch_size] relation_ids, sequence_lengths_re = pad_sequences(relation_ids, pad_tok=0, max_sent_length=self.max_length) # Word - relation - word word_ids, sequence_lengths = pad_sequences(word_ids, pad_tok=0, max_sent_length=self.max_length) relation_wd_ids = self.embeddings.shape[0] + relation_ids word_relation_ids = np.zeros((word_ids.shape[0], word_ids.shape[1] + relation_wd_ids.shape[1])) word_relation_ids[:, ::2] = word_ids word_relation_ids[:, 1::2] = relation_wd_ids # Pos - relation - pos pos_ids, sequence_lengths = pad_sequences(pos_ids, pad_tok=0, max_sent_length=self.max_length) relation_pos_ids = self.num_of_pos + 1 + relation_ids pos_relation_ids = np.zeros((pos_ids.shape[0], pos_ids.shape[1] + relation_pos_ids.shape[1])) pos_relation_ids[:, ::2] = pos_ids pos_relation_ids[:, 1::2] = relation_pos_ids start += self.batch_size idx += 1 yield word_relation_ids, pos_relation_ids, labels, relation_ids, sequence_lengths, sequence_lengths_re
def _next_batch(self, data, num_batch): start = 0 idx = 0 while idx < num_batch: w_batch = data['words'][start:start + self.batch_size] l_batch = data['labels'][start:start + self.batch_size] word_ids, sequence_lengths = pad_sequences(w_batch, pad_tok=0) labels, _ = pad_sequences(l_batch, pad_tok=0) start += self.batch_size idx += 1 yield (word_ids, labels, sequence_lengths)
def get_feed_dict(self, seqs, labels=None, lr=None, dropout=None): word_ids, seq_len_list = pad_sequences(seqs, pad_mark=0) feed_dict = { self.word_ids: word_ids, self.sequence_lengths: seq_len_list } if labels is not None: labels_, _ = pad_sequences(labels, pad_mark=0) feed_dict[self.labels] = labels_ if lr is not None: feed_dict[self.lr_pl] = lr if dropout is not None: feed_dict[self.dropout_pl] = dropout return feed_dict, seq_len_list
def get_scores(self, question, contexts): scores = [] num_batch = len(contexts) // self.batch_size + 1 start = 0 idx = 0 while idx < num_batch: context_batch = contexts[start:start + self.batch_size] if not context_batch: break q_char_ids, q_word_ids = zip( *[zip(*zip(*x)) for x in [question] * len(context_batch)]) padded_q_word_ids, q_sequence_lengths = pad_sequences(q_word_ids, pad_tok=0) padded_q_char_ids, q_word_lengths = pad_sequences(q_char_ids, pad_tok=0, nlevels=2) c_char_ids, c_word_ids = zip( *[zip(*zip(*x)) for x in context_batch]) padded_c_word_ids, c_sequence_lengths = pad_sequences(c_word_ids, pad_tok=0) padded_c_char_ids, c_word_lengths = pad_sequences(c_char_ids, pad_tok=0, nlevels=2) start += self.batch_size idx += 1 feed_dict = { self.cp_char_ids: padded_c_char_ids, self.cp_word_ids: padded_c_word_ids, self.cp_sequence_lengths: c_sequence_lengths, self.cp_word_lengths: c_word_lengths, self.q_word_ids: padded_q_word_ids, self.q_char_ids: padded_q_char_ids, self.q_sequence_lengths: q_sequence_lengths, self.q_word_lengths: q_word_lengths, self.keep_op: 1.0, self.is_training: False } scores.extend(self.sess.run(self.p_score, feed_dict=feed_dict)) return np.array(scores)
def _next_batch(self, data, num_batch): start = 0 idx = 0 while idx < num_batch: example_batch = data[start:start + self.batch_size] if not example_batch: break q_batch, cp_batch, cn_batch = zip(*example_batch) q_char_ids, q_word_ids = zip(*[zip(*zip(*x)) for x in q_batch]) padded_q_word_ids, q_sequence_lengths = pad_sequences(q_word_ids, pad_tok=0) padded_q_char_ids, q_word_lengths = pad_sequences(q_char_ids, pad_tok=0, nlevels=2) cp_char_ids, cp_word_ids = zip(*[zip(*zip(*x)) for x in cp_batch]) padded_cp_word_ids, cp_sequence_lengths = pad_sequences( cp_word_ids, pad_tok=0) padded_cp_char_ids, cp_word_lengths = pad_sequences(cp_char_ids, pad_tok=0, nlevels=2) cn_char_ids, cn_word_ids = zip(*[zip(*zip(*x)) for x in cn_batch]) padded_cn_word_ids, cn_sequence_lengths = pad_sequences( cn_word_ids, pad_tok=0) padded_cn_char_ids, cn_word_lengths = pad_sequences(cn_char_ids, pad_tok=0, nlevels=2) start += self.batch_size idx += 1 yield (padded_q_word_ids, padded_q_char_ids, q_sequence_lengths, q_word_lengths, padded_cp_word_ids, padded_cp_char_ids, cp_sequence_lengths, cp_word_lengths, padded_cn_word_ids, padded_cn_char_ids, cn_sequence_lengths, cn_word_lengths)
def _get_word_ids(self, X): word_ids = [x[1] for x in X] word_ids, _ = pad_sequences(word_ids, pad_tok=0, fix_length=self.max_input_word) return word_ids
def _next_batch(self, data, num_batch): start = 0 idx = 0 while idx < num_batch: # Get BATCH_SIZE samples each batch word_ids = data['words'][start:start + self.batch_size] sibling_ids = data['siblings'][start:start + self.batch_size] positions_1 = data['positions_1'][start:start + self.batch_size] positions_2 = data['positions_2'][start:start + self.batch_size] pos_ids = data['poses'][start:start + self.batch_size] synset_ids = data['synsets'][start:start + self.batch_size] relation_ids = data['relations'][start:start + self.batch_size] directions = data['directions'][start:start + self.batch_size] labels = data['labels'][start:start + self.batch_size] # Padding sentences to the length of longest one word_ids, _ = pad_sequences(word_ids, pad_tok=0, max_sent_length=self.max_length) sibling_ids, _ = pad_sequences(sibling_ids, pad_tok=0, max_sent_length=self.max_length) positions_1, _ = pad_sequences(positions_1, pad_tok=0, max_sent_length=self.max_length) positions_2, _ = pad_sequences(positions_2, pad_tok=0, max_sent_length=self.max_length) pos_ids, _ = pad_sequences(pos_ids, pad_tok=0, max_sent_length=self.max_length) synset_ids, _ = pad_sequences(synset_ids, pad_tok=0, max_sent_length=self.max_length) relation_ids, _ = pad_sequences(relation_ids, pad_tok=0, max_sent_length=self.max_length) directions, _ = pad_sequences(directions, pad_tok=0, max_sent_length=self.max_length) # print("words: ", word_ids.shape) # print("siblings: ", sibling_ids.shape) # Create index matrix with words and dependency relations between words new_relation_ids = self.embeddings.shape[ 0] + relation_ids + directions word_relation_ids = np.zeros( (word_ids.shape[0], word_ids.shape[1] + new_relation_ids.shape[1])) w_ids, rel_idxs = [], [] for j in range(word_ids.shape[1] + new_relation_ids.shape[1]): if j % 2 == 0: w_ids.append(j) else: rel_idxs.append(j) word_relation_ids[:, w_ids] = word_ids word_relation_ids[:, rel_idxs] = new_relation_ids # Create index matrix with pos tags and dependency relations between pos tags new_relation_ids = self.num_of_siblings + 1 + relation_ids + directions sibling_relation_ids = np.zeros( (sibling_ids.shape[0], sibling_ids.shape[1] + new_relation_ids.shape[1])) sibling_relation_ids[:, w_ids] = sibling_ids sibling_relation_ids[:, rel_idxs] = new_relation_ids # Create index matrix with pos tags and dependency relations between pos tags new_relation_ids = self.num_of_pos + 1 + relation_ids + directions pos_relation_ids = np.zeros( (pos_ids.shape[0], pos_ids.shape[1] + new_relation_ids.shape[1])) pos_relation_ids[:, w_ids] = pos_ids pos_relation_ids[:, rel_idxs] = new_relation_ids # Create index matrix with synsets and dependency relations between synsets new_relation_ids = self.num_of_synset + 1 + relation_ids + directions synset_relation_ids = np.zeros( (synset_ids.shape[0], synset_ids.shape[1] + new_relation_ids.shape[1])) synset_relation_ids[:, w_ids] = synset_ids synset_relation_ids[:, rel_idxs] = new_relation_ids # Create index matrix with positions and dependency relations between positions new_relation_ids = self.max_length + 1 + relation_ids + directions positions_1_relation_ids = np.zeros( (positions_1.shape[0], positions_1.shape[1] + new_relation_ids.shape[1])) positions_1_relation_ids[:, w_ids] = positions_1 positions_1_relation_ids[:, rel_idxs] = new_relation_ids # Create index matrix with positions and dependency relations between positions positions_2_relation_ids = np.zeros( (positions_2.shape[0], positions_2.shape[1] + new_relation_ids.shape[1])) positions_2_relation_ids[:, w_ids] = positions_2 positions_2_relation_ids[:, rel_idxs] = new_relation_ids start += self.batch_size idx += 1 yield positions_1_relation_ids, positions_2_relation_ids, \ word_relation_ids, sibling_relation_ids, pos_relation_ids, synset_relation_ids, relation_ids, labels