def _get_data_batch( self, data_pack: PackType, context_type: Type[Annotation], requests: Optional[DataRequest] = None, offset: int = 0, ) -> Iterable[Tuple[Dict[Any, Any], int]]: r"""Get data batches based on the requests. Args: data_pack: The data pack to retrieve data from. context_type: The context type of the data pack. This is not used and is only for compatibility reason. requests: The request detail. This is not used and is only for compatiblilty reason. offset: The offset for get_data. This is not used and is only for compatibility reason. """ packs: List[PackType] = [] instances: List[Annotation] = [] features_collection: List[Dict[str, Feature]] = [] current_size = self.pool_size for instance in list(data_pack.get(self.scope)): features = {} for tag, scheme in self.feature_scheme.items(): features[tag] = scheme["extractor"].extract( data_pack, instance) packs.append(data_pack) instances.append(instance) features_collection.append(features) if len(instances) == self.batch_size - current_size: self.batch_is_full = True batch = {"dummy": (packs, instances, features_collection)} yield batch, len(instances) self.batch_is_full = False packs = [] instances = [] features_collection = [] current_size = self.pool_size # Flush the remaining data. if len(instances) > 0: batch = {"dummy": (packs, instances, features_collection)} yield batch, len(instances)
def _get_data_batch( self, data_pack: PackType, ) -> Iterable[Tuple[Dict[Any, Any], int]]: r"""Get data batches based on the requests. Args: data_pack: The data pack to retrieve data from. """ packs: List[PackType] = [] contexts: List[Annotation] = [] features_collection: List[Dict[str, Feature]] = [] current_size = self.pool_size for instance in data_pack.get(self._context_type): contexts.append(instance) features = {} for tag, scheme in self._feature_scheme.items(): features[tag] = scheme["extractor"].extract(data_pack) packs.append(data_pack) features_collection.append(features) if len(contexts) == self.batch_size - current_size: self.batch_is_full = True batch = { "packs": packs, "contexts": contexts, "features": features_collection, } yield batch, len(contexts) self.batch_is_full = False packs = [] contexts = [] features_collection = [] current_size = self.pool_size # Flush the remaining data. if len(contexts) > 0: batch = { "packs": packs, "contexts": contexts, "features": features_collection, } yield batch, len(contexts)