Python total_allocated_bytes 예제들, pyarrow.total_allocated_bytes Python 예제들

예제 #1

0

파일 보기

def assert_arrow_memory_increases():
    import gc

    gc.collect()
    previous_allocated_memory = pa.total_allocated_bytes()
    yield
    assert pa.total_allocated_bytes() - previous_allocated_memory > 0, "Arrow memory didn't increase."

예제 #2

0

파일 보기

파일: test_cffi.py 프로젝트: zeroshade/arrow

def test_export_import_type():
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    typ = pa.list_(pa.int32())
    typ._export_to_c(ptr_schema)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    del typ
    assert pa.total_allocated_bytes() > old_allocated
    typ_new = pa.DataType._import_from_c(ptr_schema)
    assert typ_new == pa.list_(pa.int32())
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_schema_released:
        pa.DataType._import_from_c(ptr_schema)

    # Invalid format string
    pa.int32()._export_to_c(ptr_schema)
    bad_format = ffi.new("char[]", b"zzz")
    c_schema.format = bad_format
    with pytest.raises(ValueError,
                       match="Invalid or unsupported format string"):
        pa.DataType._import_from_c(ptr_schema)
    # Now released
    with assert_schema_released:
        pa.DataType._import_from_c(ptr_schema)

예제 #3

0

파일 보기

def assert_arrow_memory_doesnt_increase():
    import gc

    gc.collect()
    previous_allocated_memory = pa.total_allocated_bytes()
    yield
    assert pa.total_allocated_bytes() - previous_allocated_memory <= 0, "Arrow memory wasn't expected to increase."

예제 #4

0

파일 보기

파일: test_cffi.py 프로젝트: zeroshade/arrow

def check_export_import_schema(schema_factory):
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    schema_factory()._export_to_c(ptr_schema)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    schema_new = pa.Schema._import_from_c(ptr_schema)
    assert schema_new == schema_factory()
    assert pa.total_allocated_bytes() == old_allocated
    del schema_new
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_schema_released:
        pa.Schema._import_from_c(ptr_schema)

    # Not a struct type
    pa.int32()._export_to_c(ptr_schema)
    with pytest.raises(ValueError,
                       match="ArrowSchema describes non-struct type"):
        pa.Schema._import_from_c(ptr_schema)
    # Now released
    with assert_schema_released:
        pa.Schema._import_from_c(ptr_schema)

예제 #5

0

파일 보기

def no_pyarrow_leak():
    # No leak of C++ memory
    old_allocation = pa.total_allocated_bytes()
    try:
        yield
    finally:
        assert pa.total_allocated_bytes() == old_allocation

예제 #6

0

파일 보기

파일: test_extension_type.py 프로젝트: Shelna-GCP/SentinelToEVI-dataflow

def test_cast_kernel_on_extension_arrays():
    # test array casting
    storage = pa.array([1, 2, 3, 4], pa.int64())
    arr = pa.ExtensionArray.from_storage(IntegerType(), storage)

    # test that no allocation happens during identity cast
    allocated_before_cast = pa.total_allocated_bytes()
    casted = arr.cast(pa.int64())
    assert pa.total_allocated_bytes() == allocated_before_cast

    cases = [
        (pa.int64(), pa.Int64Array),
        (pa.int32(), pa.Int32Array),
        (pa.int16(), pa.Int16Array),
        (pa.uint64(), pa.UInt64Array),
        (pa.uint32(), pa.UInt32Array),
        (pa.uint16(), pa.UInt16Array)
    ]
    for typ, klass in cases:
        casted = arr.cast(typ)
        assert casted.type == typ
        assert isinstance(casted, klass)

    # test chunked array casting
    arr = pa.chunked_array([arr, arr])
    casted = arr.cast(pa.int16())
    assert casted.type == pa.int16()
    assert isinstance(casted, pa.ChunkedArray)

예제 #7

0

파일 보기

    def test_pandas_self_destruct(self):
        import pyarrow as pa
        rows = 2 ** 10
        cols = 4
        expected_bytes = rows * cols * 8
        df = self.spark.range(0, rows).select(*[rand() for _ in range(cols)])
        # Test the self_destruct behavior by testing _collect_as_arrow directly
        allocation_before = pa.total_allocated_bytes()
        batches = df._collect_as_arrow(split_batches=True)
        table = pa.Table.from_batches(batches)
        del batches
        pdf_split = table.to_pandas(self_destruct=True, split_blocks=True, use_threads=False)
        allocation_after = pa.total_allocated_bytes()
        difference = allocation_after - allocation_before
        # Should be around 1x the data size (table should not hold on to any memory)
        self.assertGreaterEqual(difference, 0.9 * expected_bytes)
        self.assertLessEqual(difference, 1.1 * expected_bytes)

        with self.sql_conf({"spark.sql.execution.arrow.pyspark.selfDestruct.enabled": False}):
            no_self_destruct_pdf = df.toPandas()
            # Note while memory usage is 2x data size here (both table and pdf hold on to
            # memory), in this case Arrow still only tracks 1x worth of memory (since the
            # batches are not allocated by Arrow in this case), so we can't make any
            # assertions here

        with self.sql_conf({"spark.sql.execution.arrow.pyspark.selfDestruct.enabled": True}):
            self_destruct_pdf = df.toPandas()

        assert_frame_equal(pdf_split, no_self_destruct_pdf)
        assert_frame_equal(pdf_split, self_destruct_pdf)

예제 #8

0

파일 보기

 def tearDown(self):
     self.bridge.close()
     gc.collect()
     diff_python = pa.total_allocated_bytes() - self.old_allocated_python
     self.assertEqual(
         pa.total_allocated_bytes(), self.old_allocated_python,
         f"PyArrow memory was not adequately released: {diff_python} bytes lost"
     )

예제 #9

0

파일 보기

 def assert_pyarrow_memory_released(self):
     self.run_gc()
     old_allocated = pa.total_allocated_bytes()
     yield
     self.run_gc()
     diff = pa.total_allocated_bytes() - old_allocated
     self.assertEqual(
         pa.total_allocated_bytes(), old_allocated,
         f"PyArrow memory was not adequately released: {diff} bytes lost")

예제 #10

0

파일 보기

파일: test_arrow_reader.py 프로젝트: wickieonya/datasets

def test_read_table(in_memory, dataset, arrow_file):
    filename = arrow_file
    previous_allocated_memory = pa.total_allocated_bytes()
    table = ArrowReader.read_table(filename, in_memory=in_memory)
    increased_allocated_memory = (pa.total_allocated_bytes() - previous_allocated_memory) > 0
    assert table.shape == dataset.data.shape
    assert set(table.column_names) == set(dataset.data.column_names)
    assert dict(table.to_pydict()) == dict(dataset.data.to_pydict())  # to_pydict returns OrderedDict
    assert increased_allocated_memory == in_memory

예제 #11

0

파일 보기

    def test_garbage_collection(self):
        import gc

        # Force the cyclic garbage collector to run
        gc.collect()

        bytes_before = pyarrow.total_allocated_bytes()
        pyarrow.from_pylist([1, None, 3, None])
        gc.collect()
        assert pyarrow.total_allocated_bytes() == bytes_before

예제 #12

0

파일 보기

파일: test_convert_builtin.py 프로젝트: dremio/arrow

def test_garbage_collection():
    import gc

    # Force the cyclic garbage collector to run
    gc.collect()

    bytes_before = pa.total_allocated_bytes()
    pa.array([1, None, 3, None])
    gc.collect()
    assert pa.total_allocated_bytes() == bytes_before

예제 #13

0

파일 보기

파일: test_convert_builtin.py 프로젝트: apache/arrow

    def test_garbage_collection(self):
        import gc

        # Force the cyclic garbage collector to run
        gc.collect()

        bytes_before = pyarrow.total_allocated_bytes()
        pyarrow.from_pylist([1, None, 3, None])
        gc.collect()
        assert pyarrow.total_allocated_bytes() == bytes_before

예제 #14

0

파일 보기

def test_garbage_collection():
    import gc

    # Force the cyclic garbage collector to run
    gc.collect()

    bytes_before = pa.total_allocated_bytes()
    pa.array([1, None, 3, None])
    gc.collect()
    assert pa.total_allocated_bytes() == bytes_before

예제 #15

0

파일 보기

파일: test_arrow_reader.py 프로젝트: wickieonya/datasets

def test_read_files(in_memory, dataset, arrow_file):
    filename = arrow_file
    reader = ArrowReader("", None)
    previous_allocated_memory = pa.total_allocated_bytes()
    dataset_kwargs = reader.read_files([{"filename": filename}], in_memory=in_memory)
    increased_allocated_memory = (pa.total_allocated_bytes() - previous_allocated_memory) > 0
    assert dataset_kwargs.keys() == set(["arrow_table", "data_files", "info", "split"])
    table = dataset_kwargs["arrow_table"]
    assert table.shape == dataset.data.shape
    assert set(table.column_names) == set(dataset.data.column_names)
    assert dict(table.to_pydict()) == dict(dataset.data.to_pydict())  # to_pydict returns OrderedDict
    assert increased_allocated_memory == in_memory

예제 #16

0

파일 보기

파일: test_sql.py 프로젝트: TheVinhLuong102/arrow

 def test_string_python(self):
     """
     Python -> Rust -> Python
     """
     old_allocated = pyarrow.total_allocated_bytes()
     a = pyarrow.array(["a", None, "ccc"])
     b = arrow_pyarrow_integration_testing.substring(a, 1)
     self.assertEqual(b, pyarrow.array(["", None, "cc"]))
     del a
     del b
     # No leak of C++ memory
     self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #17

0

파일 보기

파일: test_sql.py 프로젝트: TheVinhLuong102/arrow

 def test_primitive_python(self):
     """
     Python -> Rust -> Python
     """
     old_allocated = pyarrow.total_allocated_bytes()
     a = pyarrow.array([1, 2, 3])
     b = arrow_pyarrow_integration_testing.double(a)
     self.assertEqual(b, pyarrow.array([2, 4, 6]))
     del a
     del b
     # No leak of C++ memory
     self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #18

0

파일 보기

파일: test_jemalloc.py 프로젝트: hdfeos/arrow

def test_different_memory_pool():
    gc.collect()
    bytes_before_default = pa.total_allocated_bytes()
    bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()

    # it works
    array = pa.array([1, None, 3, None],  # noqa
                     memory_pool=pa.jemalloc_memory_pool())
    gc.collect()
    assert pa.total_allocated_bytes() == bytes_before_default
    assert (pa.jemalloc_memory_pool().bytes_allocated() >
            bytes_before_jemalloc)

예제 #19

0

파일 보기

파일: test_sql.py 프로젝트: abreis/arrow2

    def test_import_primitive(self):
        """
        Python -> Rust
        """
        old_allocated = pyarrow.total_allocated_bytes()

        a = pyarrow.array([2, None, 6])

        is_correct = arrow_pyarrow_integration_testing.import_primitive(a)
        self.assertTrue(is_correct)
        # No leak of C++ memory
        del a
        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #20

0

파일 보기

파일: test_sql.py 프로젝트: abreis/arrow2

    def test_export_primitive(self):
        """
        Python -> Rust
        """
        old_allocated = pyarrow.total_allocated_bytes()

        expected = pyarrow.array([2, None, 6])

        result = arrow_pyarrow_integration_testing.export_primitive()
        self.assertEqual(expected, result)
        # No leak of C++ memory
        del expected
        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #21

0

파일 보기

파일: test_jemalloc.py 프로젝트: tkelman/arrow

def test_different_memory_pool():
    gc.collect()
    bytes_before_default = pa.total_allocated_bytes()
    bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()

    # it works
    array = pa.array(
        [1, None, 3, None],  # noqa
        memory_pool=pa.jemalloc_memory_pool())
    gc.collect()
    assert pa.total_allocated_bytes() == bytes_before_default
    assert (pa.jemalloc_memory_pool().bytes_allocated() >
            bytes_before_jemalloc)

예제 #22

0

파일 보기

파일: test_sql.py 프로젝트: xiaoyi-db/arrow

 def test_time32_python(self):
     """
     Python -> Rust -> Python
     """
     old_allocated = pyarrow.total_allocated_bytes()
     a = pyarrow.array([None, 1, 2], pyarrow.time32('s'))
     b = arrow_pyarrow_integration_testing.concatenate(a)
     expected = pyarrow.array([None, 1, 2] + [None, 1, 2], pyarrow.time32('s'))
     self.assertEqual(b, expected)
     del a
     del b
     del expected
     # No leak of C++ memory
     self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #23

0

파일 보기

파일: test_sql.py 프로젝트: zofuthan/arrow

    def test_primitive_rust(self):
        """
        Rust -> Python -> Rust
        """
        old_allocated = pyarrow.total_allocated_bytes()

        def double(array):
            array = array.to_pylist()
            return pyarrow.array([x * 2 if x is not None else None for x in array])

        is_correct = arrow_pyarrow_integration_testing.double_py(double)
        self.assertTrue(is_correct)
        # No leak of C++ memory
        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #24

0

파일 보기

파일: test_export_to_cgo.py 프로젝트: vishalbelsare/arrow-1

 def assert_pyarrow_memory_released(self):
     self.run_gc()
     old_allocated = pa.total_allocated_bytes()
     old_go_allocated = cgotest.totalAllocated()
     yield
     self.run_gc()
     diff = pa.total_allocated_bytes() - old_allocated
     godiff = cgotest.totalAllocated() - old_go_allocated
     self.assertEqual(
         pa.total_allocated_bytes(), old_allocated,
         f"PyArrow memory was not adequately released: {diff} bytes lost")
     self.assertEqual(
         cgotest.totalAllocated(), old_go_allocated,
         f"Go memory was not properly released: {godiff} bytes lost")

예제 #25

0

파일 보기

def test_generator_based_builder_as_dataset(in_memory, tmp_path):
    cache_dir = tmp_path / "data"
    cache_dir.mkdir()
    cache_dir = str(cache_dir)
    dummy_builder = DummyGeneratorBasedBuilder(cache_dir=cache_dir,
                                               name="dummy")
    dummy_builder.download_and_prepare(try_from_hf_gcs=False,
                                       download_mode=FORCE_REDOWNLOAD)
    previous_allocated_memory = pa.total_allocated_bytes()
    dataset = dummy_builder.as_dataset("train", in_memory=in_memory)
    increased_allocated_memory = (pa.total_allocated_bytes() -
                                  previous_allocated_memory) > 0
    assert dataset.data.to_pydict() == {"text": ["foo"] * 100}
    assert increased_allocated_memory == in_memory

예제 #26

0

파일 보기

파일: test_cffi.py 프로젝트: zeroshade/arrow

def test_export_import_batch_reader(reader_factory):
    c_stream = ffi.new("struct ArrowArrayStream*")
    ptr_stream = int(ffi.cast("uintptr_t", c_stream))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    _export_import_batch_reader(ptr_stream, reader_factory)

    assert pa.total_allocated_bytes() == old_allocated

    # Now released
    with assert_stream_released:
        pa.RecordBatchReader._import_from_c(ptr_stream)

예제 #27

0

파일 보기

파일: test_sql.py 프로젝트: xiaoyi-db/arrow

    def test_list_array(self):
        """
        Python -> Rust -> Python
        """
        old_allocated = pyarrow.total_allocated_bytes()
        a = pyarrow.array([[], None, [1, 2], [4, 5, 6]], pyarrow.list_(pyarrow.int64()))
        b = arrow_pyarrow_integration_testing.round_trip(a)

        b.validate(full=True)
        assert a.to_pylist() == b.to_pylist()
        assert a.type == b.type
        del a
        del b
        # No leak of C++ memory
        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())

예제 #28

0

파일 보기

파일: artemis.py 프로젝트: artemis-analytics/artemis

    def rebook(self):
        """Rebook histograms for timers or profiles.
        typically called after random sampling of data chunk
        """
        self.__logger.info("Rebook")

        self.gate.hbook.rebook()  # Resets all histograms!

        self.__logger.info("artemis: allocated before reset %i",
                           pa.total_allocated_bytes())
        self.datahandler.reset()
        self.__logger.info("artemis: allocated after reset %i",
                           pa.total_allocated_bytes())

        # Reset all meta data needed for processing all job info
        self.reset_job_summary()

예제 #29

0

파일 보기

파일: test_load.py 프로젝트: songfeng/datasets

def test_load_dataset_local(dataset_loading_script_dir, data_dir, keep_in_memory, caplog):
    previous_allocated_memory = pa.total_allocated_bytes()
    dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir, keep_in_memory=keep_in_memory)
    increased_allocated_memory = (pa.total_allocated_bytes() - previous_allocated_memory) > 0
    assert len(dataset) == 2
    assert increased_allocated_memory == keep_in_memory
    for offline_simulation_mode in list(OfflineSimulationMode):
        with offline(offline_simulation_mode):
            caplog.clear()
            # Load dataset from cache
            dataset = datasets.load_dataset(DATASET_LOADING_SCRIPT_NAME, data_dir=data_dir)
            assert len(dataset) == 2
            assert "Using the latest cached version of the module" in caplog.text
    with pytest.raises(FileNotFoundError) as exc_info:
        datasets.load_dataset("_dummy")
    assert "at " + os.path.join("_dummy", "_dummy.py") in str(exc_info.value)

예제 #30

0

파일 보기

파일: test_jemalloc.py 프로젝트: tkelman/arrow

def test_default_memory_pool():
    gc.collect()
    bytes_before_default = pa.total_allocated_bytes()
    bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()

    old_memory_pool = pa.default_memory_pool()
    pa.set_memory_pool(pa.jemalloc_memory_pool())

    array = pa.array([1, None, 3, None])  # noqa

    pa.set_memory_pool(old_memory_pool)
    gc.collect()

    assert pa.total_allocated_bytes() == bytes_before_default

    assert (pa.jemalloc_memory_pool().bytes_allocated() >
            bytes_before_jemalloc)

예제 #31

0

파일 보기

파일: test_jemalloc.py 프로젝트: hdfeos/arrow

def test_default_memory_pool():
    gc.collect()
    bytes_before_default = pa.total_allocated_bytes()
    bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()

    old_memory_pool = pa.default_memory_pool()
    pa.set_memory_pool(pa.jemalloc_memory_pool())

    array = pa.array([1, None, 3, None])  # noqa

    pa.set_memory_pool(old_memory_pool)
    gc.collect()

    assert pa.total_allocated_bytes() == bytes_before_default

    assert (pa.jemalloc_memory_pool().bytes_allocated() >
            bytes_before_jemalloc)

예제 #32

0

파일 보기

    def test_batch_lifetime(self):
        gc.collect()
        old_allocated = pa.total_allocated_bytes()

        # Memory occupation should not grow with CSV file size
        def check_one_batch(reader, expected):
            batch = reader.read_next_batch()
            assert batch.to_pydict() == expected

        rows = b"10,11\n12,13\n14,15\n16,17\n"
        read_options = ReadOptions()
        read_options.column_names = ['a', 'b']
        read_options.block_size = 6
        reader = self.open_bytes(rows, read_options=read_options)
        check_one_batch(reader, {'a': [10], 'b': [11]})
        allocated_after_first_batch = pa.total_allocated_bytes()
        check_one_batch(reader, {'a': [12], 'b': [13]})
        assert pa.total_allocated_bytes() == allocated_after_first_batch
        check_one_batch(reader, {'a': [14], 'b': [15]})
        assert pa.total_allocated_bytes() == allocated_after_first_batch
        check_one_batch(reader, {'a': [16], 'b': [17]})
        assert pa.total_allocated_bytes() == allocated_after_first_batch
        with pytest.raises(StopIteration):
            reader.read_next_batch()
        assert pa.total_allocated_bytes() == old_allocated
        reader = None
        assert pa.total_allocated_bytes() == old_allocated

예제 #33

0

파일 보기

파일: test_cffi.py 프로젝트: zeroshade/arrow

def test_export_import_field():
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    field = pa.field("test", pa.list_(pa.int32()), nullable=True)
    field._export_to_c(ptr_schema)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    del field
    assert pa.total_allocated_bytes() > old_allocated

    field_new = pa.Field._import_from_c(ptr_schema)
    assert field_new == pa.field("test", pa.list_(pa.int32()), nullable=True)
    assert pa.total_allocated_bytes() == old_allocated

    # Now released
    with assert_schema_released:
        pa.Field._import_from_c(ptr_schema)

예제 #34

0

파일 보기

파일: arrow.py 프로젝트: ncgr/biofletch

 def read(self):
     start_time = arrow.utcnow()
     if self.logger:
         self.logger.debug('Reading %s', self.filename)
     table = pq.read_table(self.filename)
     read_time = (arrow.utcnow()-start_time).total_seconds()
     n_bytes = pa.total_allocated_bytes()
     if self.logger:
         self.logger.debug('Reading %s sequences (%sB) took %.1f s (%sB/s).',
                           engr_notation(len(table), powers_of_2=False),
                           engr_notation(n_bytes, digits=2),
                           read_time,
                           engr_notation(n_bytes/read_time, digits=2))
     return table

예제 #35

0

파일 보기

파일: test_array.py 프로젝트: julienledem/arrow

def test_total_bytes_allocated():
    assert pyarrow.total_allocated_bytes() == 0

예제 #36

0

파일 보기

파일: test_memory.py 프로젝트: emkornfield/arrow

def test_default_allocated_bytes():
    pool = pa.default_memory_pool()
    with allocate_bytes(pool, 1024):
        check_allocated_bytes(pool)
        assert pool.bytes_allocated() == pa.total_allocated_bytes()