예제 #1
0
    def _pad_and_sort_batch(self, DataLoaderBatch):
        batch_size = len(DataLoaderBatch)
        batch_split = list(zip(*DataLoaderBatch))

        (sequences, targets, tokens, position_changes, character_sequences,
         token_characters_count, feature_set, document_ids,
         segment_ids) = batch_split

        if not self._arguments_service.evaluate and self._run_type == RunType.Test:
            targets = None

        pad_idx = self._ner_process_service.pad_idx
        batch_representation = BatchRepresentation(
            device=self._device,
            batch_size=batch_size,
            subword_sequences=sequences,
            character_sequences=character_sequences,
            subword_characters_count=token_characters_count,
            targets=targets,
            tokens=tokens,
            position_changes=position_changes,
            manual_features=feature_set,
            additional_information=[
                (doc_id, seg_id)
                for doc_id, seg_id in zip(document_ids, segment_ids)
            ],
            pad_idx=pad_idx)

        batch_representation.sort_batch()

        return batch_representation
예제 #2
0
    def collate_function(self, batch_input):
        batch_size = len(batch_input)
        batch_split = list(zip(*batch_input))

        sequences, targets = batch_split

        batch_representation = BatchRepresentation(device=self._device,
                                                   batch_size=batch_size,
                                                   subword_sequences=sequences,
                                                   targets=targets)

        batch_representation.sort_batch()
        return batch_representation
    def _pad_and_sort_batch(self, DataLoaderBatch):
        batch_size = len(DataLoaderBatch)
        batch_split = list(zip(*DataLoaderBatch))

        context_word_ids, targets = batch_split
        batch_representation = BatchRepresentation(
            device=self._device,
            batch_size=batch_size,
            word_sequences=context_word_ids,
            targets=list(targets),
            pad_idx=self._cbow_process_service._pad_idx)

        batch_representation.sort_batch()

        return batch_representation
예제 #4
0
    def _pad_and_sort_batch(self, DataLoaderBatch):
        batch_size = len(DataLoaderBatch)
        batch_split = list(zip(*DataLoaderBatch))

        sequences, _, ocr_texts, gs_texts = batch_split

        batch_representation = BatchRepresentation(
            device=self._device,
            batch_size=batch_size,
            subword_sequences=sequences,
            character_sequences=ocr_texts,
            targets=gs_texts,
            offset_lists=None)  # TODO add offset lists

        batch_representation.sort_batch()
        return batch_representation
예제 #5
0
    def collate_function(self, batch_input):
        batch_size = len(batch_input)
        batch_split = list(zip(*batch_input))

        sequences, ocr_texts, gs_texts, offset_lists, tokens = batch_split

        batch_representation = BatchRepresentation(
            device=self._device,
            batch_size=batch_size,
            subword_sequences=sequences,
            character_sequences=ocr_texts,
            targets=gs_texts,
            tokens=tokens,
            offset_lists=offset_lists)

        batch_representation.sort_batch()
        return batch_representation