예제 #1
0
 def _put_chunk(chunk_key, data, spill_times=1):
     try:
         refs = self._chunk_store.put(session_id, chunk_key, data)
         cache_ref.register_chunk(session_id, chunk_key)
         del refs
         left_keys.remove(chunk_key)
     except StoreFull:
         return cache_ref.spill_size(2 * spill_times * calc_data_size(data), _promise=True) \
             .then(partial(_put_chunk, chunk_key, data, 2 * spill_times))
예제 #2
0
        def _put_chunk(data_key, data, *_):
            def _handle_reject(*exc):
                if issubclass(exc[0], NoDataToSpill):
                    return
                six.reraise(*exc)

            try:
                ref = chunk_store.put(session_id, data_key, data)
                chunk_holder_ref.register_chunk(session_id, data_key)
                self.ctx.sleep(0.5)
                del ref
            except StoreFull:
                return chunk_holder_ref.spill_size(calc_data_size(data) * 2, _promise=True) \
                    .then(partial(_put_chunk, data_key, data), _handle_reject)
예제 #3
0
파일: test_spill.py 프로젝트: qinxuye/mars
 async def put(self, session_id: str, data_key: str, obj: object,
               level: StorageLevel):
     size = calc_data_size(obj)
     await self.request_quota_with_spill(level, size)
     # sleep to trigger `NoDataToSpill`
     await asyncio.sleep(0.5)
     object_info = await self._clients[level].put(obj)
     data_info = build_data_info(object_info, level, size)
     await self._data_manager_ref.put_data_info(session_id, data_key,
                                                data_info, object_info)
     if object_info.size is not None and data_info.memory_size != object_info.size:
         await self._quota_refs[level].update_quota(object_info.size -
                                                    data_info.memory_size)
     await self.notify_spillable_space(level)
     return data_info
예제 #4
0
파일: test_lib.py 프로젝트: tomzhang/mars-1
    def testGroupByWrapper(self):
        df = pd.DataFrame(
            {
                'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                'B':
                ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                'C': np.random.randn(8),
                'D': np.random.randn(8)
            },
            index=pd.MultiIndex.from_tuples([(i // 4, i) for i in range(8)]))

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, level=0).to_tuple())
        assert_groupby_equal(grouped, df.groupby(level=0))
        self.assertEqual(grouped.shape, (8, 4))
        self.assertTrue(grouped.is_frame)
        self.assertGreater(sys.getsizeof(grouped),
                           sys.getsizeof(grouped.groupby_obj))
        self.assertGreater(calc_data_size(grouped),
                           sys.getsizeof(grouped.groupby_obj))

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, level=0).C.to_tuple())
        assert_groupby_equal(grouped, df.groupby(level=0).C)
        self.assertEqual(grouped.shape, (8, ))
        self.assertFalse(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, 'B').to_tuple())
        assert_groupby_equal(grouped, df.groupby('B'))
        self.assertEqual(grouped.shape, (8, 4))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, 'B').C.to_tuple(truncate=True))
        assert_groupby_equal(grouped, df.groupby('B').C, with_selection=True)
        self.assertEqual(grouped.shape, (8, ))
        self.assertFalse(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, 'B')[['C', 'D']].to_tuple(truncate=True))
        assert_groupby_equal(grouped,
                             df.groupby('B')[['C', 'D']],
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, 2))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, ['B', 'C']).to_tuple(truncate=True))
        assert_groupby_equal(grouped, df.groupby(['B', 'C']))
        self.assertEqual(grouped.shape, (8, 4))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, ['B', 'C']).C.to_tuple(truncate=True))
        assert_groupby_equal(grouped,
                             df.groupby(['B', 'C']).C,
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, ))
        self.assertFalse(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, ['B', 'C'])[['A',
                                             'D']].to_tuple(truncate=True))
        assert_groupby_equal(grouped,
                             df.groupby(['B', 'C'])[['A', 'D']],
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, 2))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df, ['B', 'C'])[['C',
                                             'D']].to_tuple(truncate=True))
        assert_groupby_equal(grouped,
                             df.groupby(['B', 'C'])[['C', 'D']],
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, 2))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(
                df, lambda x: x[-1] % 2).to_tuple(pickle_function=True))
        assert_groupby_equal(grouped,
                             df.groupby(lambda x: x[-1] % 2),
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, 4))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(
                df, lambda x: x[-1] % 2).C.to_tuple(pickle_function=True))
        assert_groupby_equal(grouped,
                             df.groupby(lambda x: x[-1] % 2).C,
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, ))
        self.assertFalse(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(
                df, lambda x: x[-1] % 2)[['C',
                                          'D']].to_tuple(pickle_function=True))
        assert_groupby_equal(grouped,
                             df.groupby(lambda x: x[-1] % 2)[['C', 'D']],
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, 2))
        self.assertTrue(grouped.is_frame)

        grouped = GroupByWrapper.from_tuple(
            wrapped_groupby(df.B, lambda x: x[-1] % 2).to_tuple())
        assert_groupby_equal(grouped,
                             df.B.groupby(lambda x: x[-1] % 2),
                             with_selection=True)
        self.assertEqual(grouped.shape, (8, ))
        self.assertFalse(grouped.is_frame)
예제 #5
0
 def _patch_start_tracker_estimator(ctx, op: StartTracker):
     op.estimate_size(ctx, op)
     estimated_size = ctx[op.outputs[0].key]
     assert estimated_size[0] == estimated_size[1] == calc_data_size(
         op.outputs[0])
예제 #6
0
파일: test_lib.py 프로젝트: qinxuye/mars
def test_groupby_wrapper():
    df = pd.DataFrame(
        {
            'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
            'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
            'C': np.random.randn(8),
            'D': np.random.randn(8)
        },
        index=pd.MultiIndex.from_tuples([(i // 4, i) for i in range(8)]))

    conv_func = lambda x: pickle.loads(pickle.dumps(x))

    grouped = conv_func(wrapped_groupby(df, level=0))
    assert_groupby_equal(grouped, df.groupby(level=0))
    assert grouped.shape == (8, 4)
    assert grouped.is_frame is True
    assert sys.getsizeof(grouped) > sys.getsizeof(grouped.groupby_obj)
    assert calc_data_size(grouped) > sys.getsizeof(grouped.groupby_obj)

    grouped = conv_func(wrapped_groupby(df, level=0).C)
    assert_groupby_equal(grouped, df.groupby(level=0).C)
    assert grouped.shape == (8, )
    assert grouped.is_frame is False

    grouped = conv_func(wrapped_groupby(df, 'B'))
    assert_groupby_equal(grouped, df.groupby('B'))
    assert grouped.shape == (8, 4)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df, 'B').C)
    assert_groupby_equal(grouped, df.groupby('B').C, with_selection=True)
    assert grouped.shape == (8, )
    assert grouped.is_frame is False

    grouped = conv_func(wrapped_groupby(df, 'B')[['C', 'D']])
    assert_groupby_equal(grouped,
                         df.groupby('B')[['C', 'D']],
                         with_selection=True)
    assert grouped.shape == (8, 2)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df, ['B', 'C']))
    assert_groupby_equal(grouped, df.groupby(['B', 'C']))
    assert grouped.shape == (8, 4)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df, ['B', 'C']).C)
    assert_groupby_equal(grouped,
                         df.groupby(['B', 'C']).C,
                         with_selection=True)
    assert grouped.shape == (8, )
    assert grouped.is_frame is False

    grouped = conv_func(wrapped_groupby(df, ['B', 'C'])[['A', 'D']])
    assert_groupby_equal(grouped,
                         df.groupby(['B', 'C'])[['A', 'D']],
                         with_selection=True)
    assert grouped.shape == (8, 2)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df, ['B', 'C'])[['C', 'D']])
    assert_groupby_equal(grouped,
                         df.groupby(['B', 'C'])[['C', 'D']],
                         with_selection=True)
    assert grouped.shape == (8, 2)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df, lambda x: x[-1] % 2))
    assert_groupby_equal(grouped,
                         df.groupby(lambda x: x[-1] % 2),
                         with_selection=True)
    assert grouped.shape == (8, 4)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df, lambda x: x[-1] % 2).C)
    assert_groupby_equal(grouped,
                         df.groupby(lambda x: x[-1] % 2).C,
                         with_selection=True)
    assert grouped.shape == (8, )
    assert grouped.is_frame is False

    grouped = conv_func(wrapped_groupby(df, lambda x: x[-1] % 2)[['C', 'D']])
    assert_groupby_equal(grouped,
                         df.groupby(lambda x: x[-1] % 2)[['C', 'D']],
                         with_selection=True)
    assert grouped.shape == (8, 2)
    assert grouped.is_frame is True

    grouped = conv_func(wrapped_groupby(df.B, lambda x: x[-1] % 2))
    assert_groupby_equal(grouped,
                         df.B.groupby(lambda x: x[-1] % 2),
                         with_selection=True)
    assert grouped.shape == (8, )
    assert grouped.is_frame is False