def _put_chunk(chunk_key, data, spill_times=1): try: refs = self._chunk_store.put(session_id, chunk_key, data) cache_ref.register_chunk(session_id, chunk_key) del refs left_keys.remove(chunk_key) except StoreFull: return cache_ref.spill_size(2 * spill_times * calc_data_size(data), _promise=True) \ .then(partial(_put_chunk, chunk_key, data, 2 * spill_times))
def _put_chunk(data_key, data, *_): def _handle_reject(*exc): if issubclass(exc[0], NoDataToSpill): return six.reraise(*exc) try: ref = chunk_store.put(session_id, data_key, data) chunk_holder_ref.register_chunk(session_id, data_key) self.ctx.sleep(0.5) del ref except StoreFull: return chunk_holder_ref.spill_size(calc_data_size(data) * 2, _promise=True) \ .then(partial(_put_chunk, data_key, data), _handle_reject)
async def put(self, session_id: str, data_key: str, obj: object, level: StorageLevel): size = calc_data_size(obj) await self.request_quota_with_spill(level, size) # sleep to trigger `NoDataToSpill` await asyncio.sleep(0.5) object_info = await self._clients[level].put(obj) data_info = build_data_info(object_info, level, size) await self._data_manager_ref.put_data_info(session_id, data_key, data_info, object_info) if object_info.size is not None and data_info.memory_size != object_info.size: await self._quota_refs[level].update_quota(object_info.size - data_info.memory_size) await self.notify_spillable_space(level) return data_info
def testGroupByWrapper(self): df = pd.DataFrame( { 'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8) }, index=pd.MultiIndex.from_tuples([(i // 4, i) for i in range(8)])) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, level=0).to_tuple()) assert_groupby_equal(grouped, df.groupby(level=0)) self.assertEqual(grouped.shape, (8, 4)) self.assertTrue(grouped.is_frame) self.assertGreater(sys.getsizeof(grouped), sys.getsizeof(grouped.groupby_obj)) self.assertGreater(calc_data_size(grouped), sys.getsizeof(grouped.groupby_obj)) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, level=0).C.to_tuple()) assert_groupby_equal(grouped, df.groupby(level=0).C) self.assertEqual(grouped.shape, (8, )) self.assertFalse(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, 'B').to_tuple()) assert_groupby_equal(grouped, df.groupby('B')) self.assertEqual(grouped.shape, (8, 4)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, 'B').C.to_tuple(truncate=True)) assert_groupby_equal(grouped, df.groupby('B').C, with_selection=True) self.assertEqual(grouped.shape, (8, )) self.assertFalse(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, 'B')[['C', 'D']].to_tuple(truncate=True)) assert_groupby_equal(grouped, df.groupby('B')[['C', 'D']], with_selection=True) self.assertEqual(grouped.shape, (8, 2)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, ['B', 'C']).to_tuple(truncate=True)) assert_groupby_equal(grouped, df.groupby(['B', 'C'])) self.assertEqual(grouped.shape, (8, 4)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, ['B', 'C']).C.to_tuple(truncate=True)) assert_groupby_equal(grouped, df.groupby(['B', 'C']).C, with_selection=True) self.assertEqual(grouped.shape, (8, )) self.assertFalse(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, ['B', 'C'])[['A', 'D']].to_tuple(truncate=True)) assert_groupby_equal(grouped, df.groupby(['B', 'C'])[['A', 'D']], with_selection=True) self.assertEqual(grouped.shape, (8, 2)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df, ['B', 'C'])[['C', 'D']].to_tuple(truncate=True)) assert_groupby_equal(grouped, df.groupby(['B', 'C'])[['C', 'D']], with_selection=True) self.assertEqual(grouped.shape, (8, 2)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby( df, lambda x: x[-1] % 2).to_tuple(pickle_function=True)) assert_groupby_equal(grouped, df.groupby(lambda x: x[-1] % 2), with_selection=True) self.assertEqual(grouped.shape, (8, 4)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby( df, lambda x: x[-1] % 2).C.to_tuple(pickle_function=True)) assert_groupby_equal(grouped, df.groupby(lambda x: x[-1] % 2).C, with_selection=True) self.assertEqual(grouped.shape, (8, )) self.assertFalse(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby( df, lambda x: x[-1] % 2)[['C', 'D']].to_tuple(pickle_function=True)) assert_groupby_equal(grouped, df.groupby(lambda x: x[-1] % 2)[['C', 'D']], with_selection=True) self.assertEqual(grouped.shape, (8, 2)) self.assertTrue(grouped.is_frame) grouped = GroupByWrapper.from_tuple( wrapped_groupby(df.B, lambda x: x[-1] % 2).to_tuple()) assert_groupby_equal(grouped, df.B.groupby(lambda x: x[-1] % 2), with_selection=True) self.assertEqual(grouped.shape, (8, )) self.assertFalse(grouped.is_frame)
def _patch_start_tracker_estimator(ctx, op: StartTracker): op.estimate_size(ctx, op) estimated_size = ctx[op.outputs[0].key] assert estimated_size[0] == estimated_size[1] == calc_data_size( op.outputs[0])
def test_groupby_wrapper(): df = pd.DataFrame( { 'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8) }, index=pd.MultiIndex.from_tuples([(i // 4, i) for i in range(8)])) conv_func = lambda x: pickle.loads(pickle.dumps(x)) grouped = conv_func(wrapped_groupby(df, level=0)) assert_groupby_equal(grouped, df.groupby(level=0)) assert grouped.shape == (8, 4) assert grouped.is_frame is True assert sys.getsizeof(grouped) > sys.getsizeof(grouped.groupby_obj) assert calc_data_size(grouped) > sys.getsizeof(grouped.groupby_obj) grouped = conv_func(wrapped_groupby(df, level=0).C) assert_groupby_equal(grouped, df.groupby(level=0).C) assert grouped.shape == (8, ) assert grouped.is_frame is False grouped = conv_func(wrapped_groupby(df, 'B')) assert_groupby_equal(grouped, df.groupby('B')) assert grouped.shape == (8, 4) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df, 'B').C) assert_groupby_equal(grouped, df.groupby('B').C, with_selection=True) assert grouped.shape == (8, ) assert grouped.is_frame is False grouped = conv_func(wrapped_groupby(df, 'B')[['C', 'D']]) assert_groupby_equal(grouped, df.groupby('B')[['C', 'D']], with_selection=True) assert grouped.shape == (8, 2) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df, ['B', 'C'])) assert_groupby_equal(grouped, df.groupby(['B', 'C'])) assert grouped.shape == (8, 4) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df, ['B', 'C']).C) assert_groupby_equal(grouped, df.groupby(['B', 'C']).C, with_selection=True) assert grouped.shape == (8, ) assert grouped.is_frame is False grouped = conv_func(wrapped_groupby(df, ['B', 'C'])[['A', 'D']]) assert_groupby_equal(grouped, df.groupby(['B', 'C'])[['A', 'D']], with_selection=True) assert grouped.shape == (8, 2) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df, ['B', 'C'])[['C', 'D']]) assert_groupby_equal(grouped, df.groupby(['B', 'C'])[['C', 'D']], with_selection=True) assert grouped.shape == (8, 2) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df, lambda x: x[-1] % 2)) assert_groupby_equal(grouped, df.groupby(lambda x: x[-1] % 2), with_selection=True) assert grouped.shape == (8, 4) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df, lambda x: x[-1] % 2).C) assert_groupby_equal(grouped, df.groupby(lambda x: x[-1] % 2).C, with_selection=True) assert grouped.shape == (8, ) assert grouped.is_frame is False grouped = conv_func(wrapped_groupby(df, lambda x: x[-1] % 2)[['C', 'D']]) assert_groupby_equal(grouped, df.groupby(lambda x: x[-1] % 2)[['C', 'D']], with_selection=True) assert grouped.shape == (8, 2) assert grouped.is_frame is True grouped = conv_func(wrapped_groupby(df.B, lambda x: x[-1] % 2)) assert_groupby_equal(grouped, df.B.groupby(lambda x: x[-1] % 2), with_selection=True) assert grouped.shape == (8, ) assert grouped.is_frame is False