コード例 #1
0
    def encode_and_pad(self, data_batches, word2id_dictionary):
        #################### Prepare Training data################
        print('Encoding Data...')
        max_sentences = []
        max_length = []
        no_padding_sentences = []
        no_padding_lengths = []
        for index, batch in tqdm(enumerate(data_batches)):
            batch = hF.encode_batch(batch, word2id_dictionary)

            num_sentences = [len(x) for x in batch]
            sentence_lengthes = [[len(x) for x in y] for y in batch]
            max_num_sentences = max(num_sentences)
            max_sentences_length = max([max(x) for x in sentence_lengthes])

            batch, no_padding_num_sentences = hF.pad_batch_with_sentences(
                batch, max_num_sentences)
            batch, no_padding_sentence_lengths = hF.pad_batch_sequences(
                batch, max_sentences_length)

            max_sentences.append(max_num_sentences)
            max_length.append(max_sentences_length)
            no_padding_sentences.append(no_padding_num_sentences)
            no_padding_lengths.append(no_padding_sentence_lengths)
            data_batches[index] = batch
        ##########################################
        return data_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths
コード例 #2
0
    def pad_batch(self, data_batch):
        num_sentences = [len(x) for x in data_batch]
        sentence_lengthes = [[len(x) for x in y] for y in data_batch]
        max_num_sentences = max(num_sentences)
        max_sentences_length = max([max(x) for x in sentence_lengthes])

        data_batch, no_padding_num_sentences = hF.pad_batch_with_sentences(
            data_batch, max_num_sentences)
        data_batch, no_padding_sentence_lengths = hF.pad_batch_sequences(
            data_batch, max_sentences_length)

        ##########################################
        return data_batch, max_num_sentences, max_sentences_length, no_padding_num_sentences, no_padding_sentence_lengths
コード例 #3
0
    def pad(self, data_batches):
        print('padding Data...')
        max_sentences = []
        max_length = []
        no_padding_sentences = []
        no_padding_lengths = []
        for index, batch in tqdm(enumerate(data_batches)):
            num_sentences = [len(x) for x in batch]
            sentence_lengthes = [[len(x) for x in y] for y in batch]
            max_num_sentences = max(num_sentences)
            max_sentences_length = max([max(x) for x in sentence_lengthes])

            batch, no_padding_num_sentences = hF.pad_batch_with_sentences(
                batch, max_num_sentences)
            batch, no_padding_sentence_lengths = hF.pad_batch_sequences(
                batch, max_sentences_length)

            max_sentences.append(max_num_sentences)
            max_length.append(max_sentences_length)
            no_padding_sentences.append(no_padding_num_sentences)
            no_padding_lengths.append(no_padding_sentence_lengths)
            data_batches[index] = batch
        ##########################################
        return data_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths