def _store_reader( store: Store, config: StoreConfigMap, labels: tp.Iterator[tp.Hashable], max_persist: tp.Optional[int], ) -> FrameIterType: ''' Read as many labels as possible from Store, then yield back each one at a time. If max_persist is active, max_persist will set the maximum number of Frame to load per read. Using Store.read_many is shown to have significant performance benefits on large collections of Frame. ''' if max_persist is None: for frame in store.read_many(labels, config=config): yield frame elif max_persist > 1: coll = [] for label in labels: coll.append(label) # try to collect max_persist-sized bundles in coll, then use read_many to get all at once, then clear if we have more to iter if len(coll) == max_persist: for frame in store.read_many(coll, config=config): yield frame coll.clear() if coll: # less than max persist remaining for frame in store.read_many(coll, config=config): yield frame else: # max persist is 1 for label in labels: yield store.read(label, config=config[labels])
def _from_store( cls, store: Store, config: StoreConfigMapInitializer = None, ) -> 'Batch': config_map = StoreConfigMap.from_initializer(config) items = ((label, store.read(label, config=config_map[label])) for label in store.labels()) return cls(items, config=config)
def _from_store( cls, store: Store, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int] = None, # not used ) -> 'Batch': config_map = StoreConfigMap.from_initializer(config) items = ((label, store.read(label, config=config_map[label])) for label in store.labels()) return cls(items, config=config)
def test_store_get_field_names_and_dtypes_d(self) -> None: from static_frame.core.index_hierarchy import IndexHierarchy columns = IndexHierarchy.from_labels(((1, 'a'), (1, 'b'), (2, 'c')), name=('foo', 'bar')) f1 = Frame.from_records((('a', True, None), ), index=(('a', )), columns=columns) field_names, dtypes = Store.get_field_names_and_dtypes( frame=f1, include_index=True, include_index_name=False, include_columns=True, include_columns_name=True, ) self.assertEqual(field_names, [('foo', 'bar'), "[1 'a']", "[1 'b']", "[2 'c']"]) self.assertTrue(len(field_names) == len(dtypes)) field_names, dtypes = Store.get_field_names_and_dtypes( frame=f1, include_index=True, include_index_name=False, include_columns=True, include_columns_name=True, force_brackets=True, ) self.assertEqual(field_names, ["['foo' 'bar']", "[1 'a']", "[1 'b']", "[2 'c']"]) with self.assertRaises(StoreParameterConflict): field_names, dtypes = Store.get_field_names_and_dtypes( frame=f1, include_index=True, include_index_name=False, include_columns=True, include_columns_name=False, ) with self.assertRaises(StoreParameterConflict): field_names, dtypes = Store.get_field_names_and_dtypes( frame=f1, include_index=False, include_index_name=False, include_columns=True, include_columns_name=True, )
def _from_store(cls, store: Store, config: StoreConfigMapInitializer = None ) -> 'Bus': return cls(cls._deferred_series(store.labels()), store=store, config=config )
def test_store_get_field_names_and_dtypes_c(self) -> None: f1 = Frame.from_records((('a', True, None),), index=(('a',)), columns=(('x', 'y', 'z'))).rename(columns='foo') with self.assertRaises(StoreParameterConflict): field_names, dtypes = Store.get_field_names_and_dtypes(frame=f1, include_index=False, include_index_name=True, include_columns=True, include_columns_name=True, ) field_names, dtypes = Store.get_field_names_and_dtypes(frame=f1, include_index=True, include_index_name=False, include_columns=True, include_columns_name=True, ) self.assertEqual(field_names, ['foo', 'x', 'y', 'z']) self.assertTrue(len(field_names) == len(dtypes))
def test_store_get_field_names_and_dtypes_b(self) -> None: f1 = Frame.from_records((('a', True, None),), index=(('a',)), columns=(('x', 'y', 'z'))) field_names, dtypes = Store.get_field_names_and_dtypes(frame=f1, include_index=False, include_columns=True) self.assertEqual(field_names.tolist(), ['x', 'y', 'z']) #type: ignore self.assertEqual(dtypes.tolist(), #type: ignore [np.dtype('<U1'), np.dtype('bool'), np.dtype('O')])
def _from_store(cls, store: Store, *, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int], ) -> 'Bus': return cls(cls._deferred_series(store.labels()), store=store, config=config, max_persist=max_persist, )
def _from_store( cls, store: Store, *, config: StoreConfigMapInitializer = None, max_workers: tp.Optional[int] = None, chunksize: int = 1, use_threads: bool = False, ) -> 'Batch': config_map = StoreConfigMap.from_initializer(config) items = ((label, store.read(label, config=config_map[label])) for label in store.labels(config=config_map)) return cls( items, config=config, max_workers=max_workers, chunksize=chunksize, use_threads=use_threads, )
def test_store_get_field_names_and_dtypes_a(self) -> None: f1 = Frame.from_records((('a', True, None),), index=(('a',)), columns=(('x', 'y', 'z'))) field_names, dtypes = Store.get_field_names_and_dtypes(frame=f1, include_index=False, include_index_name=True, include_columns=False, include_columns_name=False, ) self.assertEqual(field_names, range(0, 3)) self.assertEqual(dtypes, [np.dtype('<U1'), np.dtype('bool'), np.dtype('O')])
def _from_store(cls, store: Store, *, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int] = None, index_constructor: IndexConstructor = None, ) -> 'Bus': return cls(cls._deferred_series( store.labels(config=config), index_constructor=index_constructor, ), store=store, config=config, max_persist=max_persist, own_data=True, )
def _from_store( cls, store: Store, *, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int] = None, index_constructor: IndexConstructor = None, ) -> 'Bus': return cls( None, # will generate FrameDeferred array index=store.labels(config=config), index_constructor=index_constructor, store=store, config=config, max_persist=max_persist, own_data=True, )