def _pad_and_sort_batch(self, DataLoaderBatch): batch_size = len(DataLoaderBatch) batch_split = list(zip(*DataLoaderBatch)) (sequences, targets, tokens, position_changes, character_sequences, token_characters_count, feature_set, document_ids, segment_ids) = batch_split if not self._arguments_service.evaluate and self._run_type == RunType.Test: targets = None pad_idx = self._ner_process_service.pad_idx batch_representation = BatchRepresentation( device=self._device, batch_size=batch_size, subword_sequences=sequences, character_sequences=character_sequences, subword_characters_count=token_characters_count, targets=targets, tokens=tokens, position_changes=position_changes, manual_features=feature_set, additional_information=[ (doc_id, seg_id) for doc_id, seg_id in zip(document_ids, segment_ids) ], pad_idx=pad_idx) batch_representation.sort_batch() return batch_representation
def collate_function(self, batch_input): batch_size = len(batch_input) batch_split = list(zip(*batch_input)) sequences, targets = batch_split batch_representation = BatchRepresentation(device=self._device, batch_size=batch_size, subword_sequences=sequences, targets=targets) batch_representation.sort_batch() return batch_representation
def _pad_and_sort_batch(self, DataLoaderBatch): batch_size = len(DataLoaderBatch) batch_split = list(zip(*DataLoaderBatch)) context_word_ids, targets = batch_split batch_representation = BatchRepresentation( device=self._device, batch_size=batch_size, word_sequences=context_word_ids, targets=list(targets), pad_idx=self._cbow_process_service._pad_idx) batch_representation.sort_batch() return batch_representation
def _pad_and_sort_batch(self, DataLoaderBatch): batch_size = len(DataLoaderBatch) batch_split = list(zip(*DataLoaderBatch)) sequences, _, ocr_texts, gs_texts = batch_split batch_representation = BatchRepresentation( device=self._device, batch_size=batch_size, subword_sequences=sequences, character_sequences=ocr_texts, targets=gs_texts, offset_lists=None) # TODO add offset lists batch_representation.sort_batch() return batch_representation
def collate_function(self, batch_input): batch_size = len(batch_input) batch_split = list(zip(*batch_input)) sequences, ocr_texts, gs_texts, offset_lists, tokens = batch_split batch_representation = BatchRepresentation( device=self._device, batch_size=batch_size, subword_sequences=sequences, character_sequences=ocr_texts, targets=gs_texts, tokens=tokens, offset_lists=offset_lists) batch_representation.sort_batch() return batch_representation