Example #1
0
    def _get_data_batch(
        self,
        data_pack: PackType,
        context_type: Type[Annotation],
        requests: Optional[DataRequest] = None,
        offset: int = 0,
    ) -> Iterable[Tuple[Dict[Any, Any], int]]:
        r"""Get data batches based on the requests.

        Args:
            data_pack: The data pack to retrieve data from.
            context_type: The context type of the data pack.
                This is not used and is only for compatibility reason.
            requests: The request detail.
                This is not used and is only for compatiblilty reason.
            offset: The offset for get_data.
                This is not used and is only for compatibility reason.
        """
        packs: List[PackType] = []
        instances: List[Annotation] = []
        features_collection: List[Dict[str, Feature]] = []
        current_size = self.pool_size

        for instance in list(data_pack.get(self.scope)):
            features = {}
            for tag, scheme in self.feature_scheme.items():
                features[tag] = scheme["extractor"].extract(
                    data_pack, instance)
            packs.append(data_pack)
            instances.append(instance)
            features_collection.append(features)

            if len(instances) == self.batch_size - current_size:
                self.batch_is_full = True
                batch = {"dummy": (packs, instances, features_collection)}
                yield batch, len(instances)
                self.batch_is_full = False
                packs = []
                instances = []
                features_collection = []
                current_size = self.pool_size

        # Flush the remaining data.
        if len(instances) > 0:
            batch = {"dummy": (packs, instances, features_collection)}
            yield batch, len(instances)
Example #2
0
    def _get_data_batch(
        self,
        data_pack: PackType,
    ) -> Iterable[Tuple[Dict[Any, Any], int]]:
        r"""Get data batches based on the requests.

        Args:
            data_pack: The data pack to retrieve data from.
        """
        packs: List[PackType] = []
        contexts: List[Annotation] = []
        features_collection: List[Dict[str, Feature]] = []
        current_size = self.pool_size

        for instance in data_pack.get(self._context_type):
            contexts.append(instance)
            features = {}
            for tag, scheme in self._feature_scheme.items():
                features[tag] = scheme["extractor"].extract(data_pack)
            packs.append(data_pack)
            features_collection.append(features)

            if len(contexts) == self.batch_size - current_size:
                self.batch_is_full = True

                batch = {
                    "packs": packs,
                    "contexts": contexts,
                    "features": features_collection,
                }

                yield batch, len(contexts)
                self.batch_is_full = False
                packs = []
                contexts = []
                features_collection = []
                current_size = self.pool_size

        # Flush the remaining data.
        if len(contexts) > 0:
            batch = {
                "packs": packs,
                "contexts": contexts,
                "features": features_collection,
            }
            yield batch, len(contexts)