Esempio n. 1
0
    def __getitem__(self, index: int) -> Mapping[str, Any]:
        """Fetch a data instance at a specified index, and apply transformations to it.

        Args:
            index: Which datapoint to retrieve.

        Returns:
            The data dictionary from the specified index, with transformations applied.
        """
        items = deepcopy(
            self.dataset[index]
        )  # Deepcopy to prevent ops from overwriting values in datasets
        if isinstance(self.dataset, BatchDataset):
            # BatchDataset may randomly sample the same elements multiple times, so need to avoid reprocessing
            unique_samples = set()
            for item in items:
                if id(item) not in unique_samples:
                    forward_numpyop(self.ops, item, {'mode': self.mode})
                    unique_samples.add(id(item))
            if self.dataset.pad_value is not None:
                pad_batch(items, self.dataset.pad_value)
            items = {
                key: np.array([item[key] for item in items])
                for key in items[0]
            }
        else:
            forward_numpyop(self.ops, items, {'mode': self.mode})
        return items
Esempio n. 2
0
    def __getitem__(self, index: int) -> Mapping[str, Any]:
        """Fetch a data instance at a specified index, and apply transformations to it.

        Args:
            index: Which datapoint to retrieve.

        Returns:
            The data dictionary from the specified index, with transformations applied.
        """
        items = deepcopy(
            self.dataset[index]
        )  # Deepcopy to prevent ops from overwriting values in datasets
        if isinstance(self.dataset, BatchDataset):
            unique_list = []
            for item in items:
                if id(item) not in unique_list:
                    forward_numpyop(self.ops, item, self.mode)
                    unique_list.append(id(item))
            if self.dataset.pad_value is not None:
                pad_batch(items, self.dataset.pad_value)
            items = {
                key: np.array([item[key] for item in items])
                for key in items[0]
            }
        else:
            forward_numpyop(self.ops, items, self.mode)
        return items
Esempio n. 3
0
    def _pad_batch_collate(self, batch: List[MutableMapping[str, Any]]) -> Dict[str, Any]:
        """A collate function which pads a batch of data.

        Args:
            batch: The data to be batched and collated.

        Returns:
            A padded and collated batch of data.
        """
        pad_batch(batch, self.pad_value)
        return default_collate(batch)
Esempio n. 4
0
    def __getitem__(self, index: int) -> Mapping[str, Any]:
        """Fetch a data instance at a specified index, and apply transformations to it.

        Args:
            index: Which datapoint to retrieve.

        Returns:
            The data dictionary from the specified index, with transformations applied.
        """
        item = self.dataset[index]
        if isinstance(item, list):
            # BatchDataset may randomly sample the same elements multiple times, so need to avoid reprocessing
            unique_samples = {}  # id: idx
            results = []
            for idx, data in enumerate(item):
                data_id = id(data)
                if data_id not in unique_samples:
                    data = _DelayedDeepDict(data)
                    forward_numpyop(self.ops, data, {'mode': self.mode})
                    data.finalize(retain=self.output_keys,
                                  deep_remainder=self.deep_remainder)
                    results.append(data)
                    unique_samples[data_id] = idx
                else:
                    results.append(results[unique_samples[data_id]])
            if hasattr(self.dataset,
                       "pad_value") and self.dataset.pad_value is not None:
                pad_batch(results, self.dataset.pad_value)
            results = {
                key: np.array([result[key] for result in results])
                for key in results[0]
            }
        else:
            results = _DelayedDeepDict(item)
            forward_numpyop(self.ops, results, {'mode': self.mode})
            results.finalize(retain=self.output_keys,
                             deep_remainder=self.deep_remainder)
        return results