def encode_and_pad(self, data_batches, word2id_dictionary): #################### Prepare Training data################ print('Encoding Data...') max_sentences = [] max_length = [] no_padding_sentences = [] no_padding_lengths = [] for index, batch in tqdm(enumerate(data_batches)): batch = hF.encode_batch(batch, word2id_dictionary) num_sentences = [len(x) for x in batch] sentence_lengthes = [[len(x) for x in y] for y in batch] max_num_sentences = max(num_sentences) max_sentences_length = max([max(x) for x in sentence_lengthes]) batch, no_padding_num_sentences = hF.pad_batch_with_sentences( batch, max_num_sentences) batch, no_padding_sentence_lengths = hF.pad_batch_sequences( batch, max_sentences_length) max_sentences.append(max_num_sentences) max_length.append(max_sentences_length) no_padding_sentences.append(no_padding_num_sentences) no_padding_lengths.append(no_padding_sentence_lengths) data_batches[index] = batch ########################################## return data_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths
def pad_batch(self, data_batch): num_sentences = [len(x) for x in data_batch] sentence_lengthes = [[len(x) for x in y] for y in data_batch] max_num_sentences = max(num_sentences) max_sentences_length = max([max(x) for x in sentence_lengthes]) data_batch, no_padding_num_sentences = hF.pad_batch_with_sentences( data_batch, max_num_sentences) data_batch, no_padding_sentence_lengths = hF.pad_batch_sequences( data_batch, max_sentences_length) ########################################## return data_batch, max_num_sentences, max_sentences_length, no_padding_num_sentences, no_padding_sentence_lengths
def pad(self, data_batches): print('padding Data...') max_sentences = [] max_length = [] no_padding_sentences = [] no_padding_lengths = [] for index, batch in tqdm(enumerate(data_batches)): num_sentences = [len(x) for x in batch] sentence_lengthes = [[len(x) for x in y] for y in batch] max_num_sentences = max(num_sentences) max_sentences_length = max([max(x) for x in sentence_lengthes]) batch, no_padding_num_sentences = hF.pad_batch_with_sentences( batch, max_num_sentences) batch, no_padding_sentence_lengths = hF.pad_batch_sequences( batch, max_sentences_length) max_sentences.append(max_num_sentences) max_length.append(max_sentences_length) no_padding_sentences.append(no_padding_num_sentences) no_padding_lengths.append(no_padding_sentence_lengths) data_batches[index] = batch ########################################## return data_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths