def to_iter(dataset: TabularDataset, pad_ix: int, batch_size: int) -> BucketIterator: #sort_within_batch = True is need for packing the padded sequence data_iter = BucketIterator(dataset, sort=True, batch_size=batch_size, device=-1, sort_within_batch=True, sort_key=lambda x: len(x.sentence), shuffle=False) data_iter.sent_lengths = [] for batch in data_iter: batch_sent_len = batch.sentence[:,0].shape[0] lengths = [] for i in range(batch.sentence.shape[1]): #must subtract the number of pads from the length lengths.append((batch_sent_len - (batch.sentence[:,i] == pad_ix).sum(dim=0)).item()) data_iter.sent_lengths.append(lengths) return data_iter