Python py_buffer 예제들, pyarrow.py_buffer Python 예제들

예제 #1

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_buffer_slicing():
    data = b'some data!'
    buf = pa.py_buffer(data)

    sliced = buf.slice(2)
    expected = pa.py_buffer(b'me data!')
    assert sliced.equals(expected)

    sliced2 = buf.slice(2, 4)
    expected2 = pa.py_buffer(b'me d')
    assert sliced2.equals(expected2)

    # 0 offset
    assert buf.slice(0).equals(buf)

    # Slice past end of buffer
    assert len(buf.slice(len(buf))) == 0

    with pytest.raises(IndexError):
        buf.slice(-1)

    # Test slice notation
    assert buf[2:].equals(buf.slice(2))
    assert buf[2:5].equals(buf.slice(2, 3))
    assert buf[-5:].equals(buf.slice(len(buf) - 5))
    with pytest.raises(IndexError):
        buf[::-1]
    with pytest.raises(IndexError):
        buf[::2]

    n = len(buf)
    for start in range(-n * 2, n * 2):
        for stop in range(-n * 2, n * 2):
            assert buf[start:stop].to_pybytes() == buf.to_pybytes()[start:stop]

예제 #2

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_equals():
    # Buffer.equals() returns true iff the buffers have the same contents
    def eq(a, b):
        assert a.equals(b)
        assert a == b
        assert not (a != b)

    def ne(a, b):
        assert not a.equals(b)
        assert not (a == b)
        assert a != b

    b1 = b'some data!'
    b2 = bytearray(b1)
    b3 = bytearray(b1)
    b3[0] = 42
    buf1 = pa.py_buffer(b1)
    buf2 = pa.py_buffer(b2)
    buf3 = pa.py_buffer(b2)
    buf4 = pa.py_buffer(b3)
    buf5 = pa.py_buffer(np.frombuffer(b2, dtype=np.int16))
    eq(buf1, buf1)
    eq(buf1, buf2)
    eq(buf2, buf3)
    ne(buf2, buf4)
    # Data type is indifferent
    eq(buf2, buf5)

예제 #3

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_from_numpy():
    # C-contiguous
    arr = np.arange(12, dtype=np.int8).reshape((3, 4))
    buf = pa.py_buffer(arr)
    assert buf.to_pybytes() == arr.tobytes()
    # F-contiguous; note strides informations is lost
    buf = pa.py_buffer(arr.T)
    assert buf.to_pybytes() == arr.tobytes()
    # Non-contiguous
    with pytest.raises(ValueError, match="not contiguous"):
        buf = pa.py_buffer(arr.T[::2])

예제 #4

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_compress_decompress():
    INPUT_SIZE = 10000
    test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
                 .astype(np.uint8)
                 .tostring())
    test_buf = pa.py_buffer(test_data)

    codecs = ['lz4', 'snappy', 'gzip', 'zstd', 'brotli']
    for codec in codecs:
        compressed_buf = pa.compress(test_buf, codec=codec)
        compressed_bytes = pa.compress(test_data, codec=codec, asbytes=True)

        assert isinstance(compressed_bytes, bytes)

        decompressed_buf = pa.decompress(compressed_buf, INPUT_SIZE,
                                         codec=codec)
        decompressed_bytes = pa.decompress(compressed_bytes, INPUT_SIZE,
                                           codec=codec, asbytes=True)

        assert isinstance(decompressed_bytes, bytes)

        assert decompressed_buf.equals(test_buf)
        assert decompressed_bytes == test_data

        with pytest.raises(ValueError):
            pa.decompress(compressed_bytes, codec=codec)

예제 #5

0

파일 보기

파일: test_cuda.py 프로젝트: emkornfield/arrow

def test_context_from_object(size):
    ctx = global_context
    arr, cbuf = make_random_buffer(size, target='device')
    dtype = arr.dtype

    # Creating device buffer from a CUDA host buffer
    hbuf = cuda.new_host_buffer(size * arr.dtype.itemsize)
    np.frombuffer(hbuf, dtype=dtype)[:] = arr
    cbuf2 = ctx.buffer_from_object(hbuf)
    assert cbuf2.size == cbuf.size
    arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
    np.testing.assert_equal(arr, arr2)

    # Creating device buffer from a device buffer
    cbuf2 = ctx.buffer_from_object(cbuf2)
    assert cbuf2.size == cbuf.size
    arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
    np.testing.assert_equal(arr, arr2)

    # Trying to create a device buffer from a Buffer
    with pytest.raises(pa.ArrowTypeError,
                       match=('buffer is not backed by a CudaBuffer')):
        ctx.buffer_from_object(pa.py_buffer(b"123"))

    # Trying to create a device buffer from numpy.array
    with pytest.raises(pa.ArrowTypeError,
                       match=('cannot create device buffer view from'
                              ' `<class \'numpy.ndarray\'>` object')):
        ctx.buffer_from_object(np.array([1, 2, 3]))

예제 #6

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_buffer_protocol_respects_immutability():
    # ARROW-3228; NumPy's frombuffer ctor determines whether a buffer-like
    # object is mutable by first attempting to get a mutable buffer using
    # PyObject_FromBuffer. If that fails, it assumes that the object is
    # immutable
    a = b'12345'
    arrow_ref = pa.py_buffer(a)
    numpy_ref = np.frombuffer(arrow_ref, dtype=np.uint8)
    assert not numpy_ref.flags.writeable

예제 #7

0

파일 보기

파일: test_array.py 프로젝트: sunchao/arrow

def test_array_from_buffers():
    values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
    nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
    arr = pa.Array.from_buffers(pa.int16(), 4, [nulls_buf, values_buf])
    assert arr.type == pa.int16()
    assert arr.to_pylist() == [4, None, 6, 7]

    arr = pa.Array.from_buffers(pa.int16(), 4, [None, values_buf])
    assert arr.type == pa.int16()
    assert arr.to_pylist() == [4, 5, 6, 7]

    arr = pa.Array.from_buffers(pa.int16(), 3, [nulls_buf, values_buf],
                                offset=1)
    assert arr.type == pa.int16()
    assert arr.to_pylist() == [None, 6, 7]

    with pytest.raises(TypeError):
        pa.Array.from_buffers(pa.int16(), 3, [u'', u''], offset=1)

예제 #8

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_buffer_address():
    b1 = b'some data!'
    b2 = bytearray(b1)
    b3 = bytearray(b1)

    buf1 = pa.py_buffer(b1)
    buf2 = pa.py_buffer(b1)
    buf3 = pa.py_buffer(b2)
    buf4 = pa.py_buffer(b3)

    assert buf1.address > 0
    assert buf1.address == buf2.address
    assert buf3.address != buf2.address
    assert buf4.address != buf3.address

    arr = np.arange(5)
    buf = pa.py_buffer(arr)
    assert buf.address == arr.ctypes.data

예제 #9

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_memoryview():
    val = b'some data'

    buf = pa.py_buffer(val)
    assert isinstance(buf, pa.Buffer)
    assert not buf.is_mutable

    result = memoryview(buf)

    assert result == val

예제 #10

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_bytearray():
    val = bytearray(b'some data')

    buf = pa.py_buffer(val)
    assert isinstance(buf, pa.Buffer)
    assert buf.is_mutable

    result = bytearray(buf)

    assert result == val

예제 #11

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_to_numpy():
    # Make sure creating a numpy array from an arrow buffer works
    byte_array = bytearray(20)
    byte_array[0] = 42
    buf = pa.py_buffer(byte_array)
    array = np.frombuffer(buf, dtype="uint8")
    assert array[0] == byte_array[0]
    byte_array[0] += 1
    assert array[0] == byte_array[0]
    assert array.base == buf

예제 #12

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_bytes():
    val = b'some data'

    buf = pa.py_buffer(val)
    assert isinstance(buf, pa.Buffer)
    assert not buf.is_mutable

    result = buf.to_pybytes()

    assert result == val

예제 #13

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_output_stream_buffer():
    data = b"some test data\n" * 10 + b"eof\n"
    buf = bytearray(len(data))
    stream = pa.output_stream(pa.py_buffer(buf))
    stream.write(data)
    assert buf == data

    buf = bytearray(len(data))
    stream = pa.output_stream(memoryview(buf))
    stream.write(data)
    assert buf == data

예제 #14

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_input_stream_buffer():
    data = b"some test data\n" * 10 + b"eof\n"
    for arg in [pa.py_buffer(data), memoryview(data)]:
        stream = pa.input_stream(arg)
        assert stream.read() == data

    gz_data = gzip_compress(data)
    stream = pa.input_stream(memoryview(gz_data))
    assert stream.read() == gz_data
    stream = pa.input_stream(memoryview(gz_data), compression='gzip')
    assert stream.read() == data

예제 #15

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_uninitialized_buffer():
    # ARROW-2039: calling Buffer() directly creates an uninitialized object
    check_uninitialized = partial(pytest.raises,
                                  ReferenceError, match="uninitialized")
    buf = pa.Buffer()
    with check_uninitialized():
        buf.size
    with check_uninitialized():
        len(buf)
    with check_uninitialized():
        buf.is_mutable
    with check_uninitialized():
        buf.parent
    with check_uninitialized():
        buf.to_pybytes()
    with check_uninitialized():
        memoryview(buf)
    with check_uninitialized():
        buf.equals(pa.py_buffer(b''))
    with check_uninitialized():
        pa.py_buffer(b'').equals(buf)

예제 #16

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_buffer_getitem():
    data = bytearray(b'some data!')
    buf = pa.py_buffer(data)

    n = len(data)
    for ix in range(-n, n - 1):
        assert buf[ix] == data[ix]

    with pytest.raises(IndexError):
        buf[n]

    with pytest.raises(IndexError):
        buf[-n - 1]

예제 #17

0

파일 보기

파일: server.py 프로젝트: rok/arrow

 def do_action(self, context, action):
     if action.type == "clear":
         raise NotImplementedError(
             "{} is not implemented.".format(action.type))
     elif action.type == "healthcheck":
         pass
     elif action.type == "shutdown":
         yield pyarrow.flight.Result(pyarrow.py_buffer(b'Shutdown!'))
         # Shut down on background thread to avoid blocking current
         # request
         threading.Thread(target=self._shutdown).start()
     else:
         raise KeyError(f"Unknown action {action.type!r}")

예제 #18

0

파일 보기

파일: test_serialization.py 프로젝트: rok/arrow

def test_deserialize_buffer_in_different_process():
    import tempfile

    f = tempfile.NamedTemporaryFile(delete=False)
    b = pa.serialize(pa.py_buffer(b'hello')).to_buffer()
    f.write(b.to_pybytes())
    f.close()

    subprocess_env = test_util.get_modified_env_with_pythonpath()

    dir_path = os.path.dirname(os.path.realpath(__file__))
    python_file = os.path.join(dir_path, 'deserialize_buffer.py')
    subprocess.check_call([sys.executable, python_file, f.name],
                          env=subprocess_env)

예제 #19

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_buffer_bytes():
    val = b'some data'

    buf = pa.py_buffer(val)
    assert isinstance(buf, pa.Buffer)
    assert not buf.is_mutable

    result = buf.to_pybytes()

    assert result == val

    # Check that buffers survive a pickle roundtrip
    result_buf = pickle.loads(pickle.dumps(buf))
    result = result_buf.to_pybytes()
    assert result == val

예제 #20

0

파일 보기

파일: test_io.py 프로젝트: aalva500/Project1-COP4813

def test_buffer_bytes():
    val = b'some data'

    buf = pa.py_buffer(val)
    assert isinstance(buf, pa.Buffer)
    assert not buf.is_mutable
    assert buf.is_cpu

    result = buf.to_pybytes()

    assert result == val

    # Check that buffers survive a pickle roundtrip
    result_buf = pickle.loads(pickle.dumps(buf))
    result = result_buf.to_pybytes()
    assert result == val

예제 #21

0

파일 보기

    def to_arrow(self):
        offsets = self.offsets.to_arrow()
        elements = (pa.nulls(len(self.elements)) if len(self.elements)
                    == self.elements.null_count else self.elements.to_arrow())
        pa_type = pa.list_(elements.type)

        if self.nullable:
            nbuf = self.mask.to_host_array().view("int8")
            nbuf = pa.py_buffer(nbuf)
            buffers = (nbuf, offsets.buffers()[1])
        else:
            buffers = offsets.buffers()
        return pa.ListArray.from_buffers(pa_type,
                                         len(self),
                                         buffers,
                                         children=[elements])

예제 #22

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_memoryview_is_immutable():
    val = b'some data'

    buf = pa.py_buffer(val)
    assert not buf.is_mutable
    assert isinstance(buf, pa.Buffer)

    result = memoryview(buf)
    assert result.readonly

    with pytest.raises(TypeError) as exc:
        result[0] = b'h'
        assert 'cannot modify read-only' in str(exc.value)

    b = bytes(buf)
    with pytest.raises(TypeError) as exc:
        b[0] = b'h'
        assert 'cannot modify read-only' in str(exc.value)

예제 #23

0

파일 보기

파일: string.py 프로젝트: xhochy/fletcher

def _apply_binary_str_array(a: pa.Array,
                            b: pa.Array,
                            *,
                            func: Callable,
                            output_dtype,
                            parallel: bool = False):
    out = np.empty(len(a), dtype=output_dtype)

    offsets_buffer_a, data_buffer_a = _extract_string_buffers(a)
    offsets_buffer_b, data_buffer_b = _extract_string_buffers(b)

    if a.null_count == 0 and b.null_count == 0:
        if parallel:
            call = _apply_no_nulls_parallel
        else:
            call = _apply_no_nulls
        call(
            func,
            len(a),
            offsets_buffer_a,
            data_buffer_a,
            offsets_buffer_b,
            data_buffer_b,
            out,
        )
        return pa.array(out)
    else:
        valid = _merge_valid_bitmaps(a, b)
        if parallel:
            call = _apply_with_nulls_parallel
        else:
            call = _apply_with_nulls
        call(
            func,
            len(a),
            valid,
            offsets_buffer_a,
            data_buffer_a,
            offsets_buffer_b,
            data_buffer_b,
            out,
        )
        buffers = [pa.py_buffer(x) for x in [valid, out]]
        return pa.Array.from_buffers(pa.int64(), len(out), buffers)

예제 #24

0

파일 보기

파일: client.py 프로젝트: asas-sn/skypatrol

    def adql_query(self, query_str, download=False, threads=1):
        """
        Query the ASAS-SN Sky Patrol Input Catalogs with an ADQL string.
        See README.md for more on accepted ADQL context and functions.

        :param query_str: ADQL query string
        :param download: return full light curves if True, return catalog information if False
        :param threads: number of real threads to use for pulling light curves from server.
        :return: if 'download' if False; pandas Dataframe containing catalog information of targets;
                else LightCurveCollection
        """
        # Check inputs
        if type(download) is not bool:
            raise ValueError("'download' must be boolean value")
        if type(threads) is not int:
            raise ValueError("'threads' must be integer value")
        if type(query_str) is not str:
            raise ValueError("'query_str' must me string value")

        # Trim ADQL input
        query_str = re.sub(' +', ' ', query_str).replace("\n", "")
        query_bytes = encodebytes(bytes(query_str, encoding='utf-8')).decode()

        # Query Flask API with SQL bytes
        url = f"http://asassn-lb01.ifa.hawaii.edu:9006/lookup_sql/{query_bytes}"
        response = requests.post(url, json={'format': 'arrow'})

        # Check response
        if response.status_code == 400:
            error = json.loads(response.content)['error_text']
            raise RuntimeError(error)

        # Deserialize from arrow
        buff = pa.py_buffer(response.content)
        tar_df = pa.deserialize(buff)
        self.index = tar_df

        if download is False:
            return tar_df

        else:
            tar_ids = list(tar_df['asas_sn_id'])
            return self._get_curves(tar_ids, "extrasolar", threads)

예제 #25

0

파일 보기

파일: test_cronus.py 프로젝트: artemis-analytics/cronus

    def test_config(self):
        myconfig = Configuration()
        myconfig.uuid = str(uuid.uuid4())
        myconfig.name = f"{myconfig.uuid}.config.dat"

        configinfo = ConfigObjectInfo()
        configinfo.created.GetCurrentTime()
        bufconfig = pa.py_buffer(myconfig.SerializeToString())

        with tempfile.TemporaryDirectory() as dirpath:
            _path = dirpath + "/test"
            store = BaseObjectStore(
                str(_path), "test")  # wrapper to the CronusStore message
            config_uuid = store.register_content(myconfig, configinfo).uuid
            store.put(config_uuid, myconfig)
            aconfig = Configuration()
            store.get(config_uuid, aconfig)
            self.assertEqual(myconfig.name, aconfig.name)
            self.assertEqual(myconfig.uuid, aconfig.uuid)

예제 #26

0

파일 보기

파일: test_serialization.py 프로젝트: sunchao/arrow

def test_serialize_to_components_invalid_cases():
    buf = pa.py_buffer(b'hello')

    components = {
        'num_tensors': 0,
        'num_buffers': 1,
        'data': [buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)

    components = {
        'num_tensors': 1,
        'num_buffers': 0,
        'data': [buf, buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)

예제 #27

0

파일 보기

파일: legacygen.py 프로젝트: artemis-analytics/artemis

 def sampler(self):
     while self.nsamples > 0:
         self.__logger.info("%s: Generating datum " % (self.__class__.__name__))
         data = self.gen_chunk()
         self.__logger.debug(
             "%s: type data: %s" % (self.__class__.__name__, type(data))
         )
         fileinfo = FileObjectInfo()
         fileinfo.type = 2
         fileinfo.partition = self.name
         job_id = f"{self.gate.meta.job_id}_sample_{self.nsamples}"
         ds_id = self.gate.meta.parentset_id
         id_ = self.gate.store.register_content(
             data, fileinfo, dataset_id=ds_id, partition_key=self.name, job_id=job_id
         ).uuid
         buf = pa.py_buffer(data)
         self.gate.store.put(id_, buf)
         yield id_
         self.nsamples -= 1
         self.__logger.debug("Batch %i", self.nsamples)

예제 #28

0

파일 보기

파일: driver.py 프로젝트: mcolosimo-p4/bridge

    def create_reader(url, compression=None):
        parts = urllib.parse.urlparse(url)

        # S3
        if parts.scheme == 's3':
            bucket = parts.netloc
            key = parts.path[1:]
            obj = Driver.s3_client().get_object(Bucket=bucket, Key=key)
            buf = obj['Body'].read()
            strm = pyarrow.input_stream(pyarrow.py_buffer(buf),
                                        compression=compression)
            return pyarrow.RecordBatchStreamReader(strm)

        # File System
        elif parts.scheme == 'file':
            path = os.path.join(parts.netloc, parts.path)
            strm = pyarrow.input_stream(path, compression=compression)
            return pyarrow.RecordBatchStreamReader(strm)

        else:
            raise Exception('URL {} not supported'.format(url))

예제 #29

0

파일 보기

파일: test_cronus.py 프로젝트: artemis-analytics/cronus

    def test_menu(self):
        testmenu = Menu_pb()
        print(type(testmenu))
        print(testmenu)
        testmenu.uuid = str(uuid.uuid4())
        testmenu.name = f"{testmenu.uuid}.menu.dat"

        menuinfo = MenuObjectInfo()
        menuinfo.created.GetCurrentTime()
        bufmenu = pa.py_buffer(testmenu.SerializeToString())

        with tempfile.TemporaryDirectory() as dirpath:
            _path = dirpath + "/test"
            store = BaseObjectStore(
                str(_path), "test")  # wrapper to the CronusStore message
            menu_uuid = store.register_content(testmenu, menuinfo).uuid
            store.put(menu_uuid, testmenu)
            amenu = Menu_pb()
            store.get(menu_uuid, amenu)
            self.assertEqual(testmenu.name, amenu.name)
            self.assertEqual(testmenu.uuid, amenu.uuid)

예제 #30

0

파일 보기

def all_true_like(arr: pa.Array) -> pa.Array:
    """Return a boolean array with all-True with the same size as the input and the same valid bitmap."""
    valid_buffer = arr.buffers()[0]
    if valid_buffer:
        valid_buffer = valid_buffer.slice(arr.offset // 8)

    output_offset = arr.offset % 8
    output_length = len(arr) + output_offset

    output_size = output_length // 8
    if output_length % 8 > 0:
        output_size += 1
    output = np.full(output_size, fill_value=255, dtype=np.uint8)

    return pa.Array.from_buffers(
        pa.bool_(),
        len(arr),
        [valid_buffer, pa.py_buffer(output)],
        arr.null_count,
        output_offset,
    )

예제 #31

0

파일 보기

def _text_contains_case_sensitive(data: pa.Array, pat: str) -> pa.Array:
    """
    Check for each element in the data whether it contains the pattern ``pat``.

    This implementation does basic byte-by-byte comparison and is independent
    of any locales or encodings.
    """
    # Convert to UTF-8 bytes
    pat_bytes: bytes = pat.encode()

    # Initialise boolean (bit-packaed) output array.
    output_size = len(data) // 8
    if len(data) % 8 > 0:
        output_size += 1
    output = np.empty(output_size, dtype=np.uint8)
    if len(data) % 8 > 0:
        # Zero trailing bits
        output[-1] = 0

    offsets, data_buffer = _extract_string_buffers(data)

    if data.null_count == 0:
        valid_buffer = None
        _text_contains_case_sensitive_nonnull(
            len(data), offsets, data_buffer, pat_bytes, output
        )
    else:
        valid = _buffer_to_view(data.buffers()[0])
        _text_contains_case_sensitive_nulls(
            len(data), valid, data.offset, offsets, data_buffer, pat_bytes, output
        )
        valid_buffer = data.buffers()[0].slice(data.offset // 8)
        if data.offset % 8 != 0:
            valid_buffer = shift_unaligned_bitmap(
                valid_buffer, data.offset % 8, len(data)
            )

    return pa.Array.from_buffers(
        pa.bool_(), len(data), [valid_buffer, pa.py_buffer(output)], data.null_count
    )

예제 #32

0

파일 보기

def or_na(arr: pa.Array) -> pa.Array:
    """Apply ``array | NA`` with a boolean pyarrow.Array."""
    output_length = len(arr) // 8
    if len(arr) % 8 != 0:
        output_length += 1

    if arr.null_count == 0:
        return pa.Array.from_buffers(
            pa.bool_(),
            len(arr),
            [arr.buffers()[1], arr.buffers()[1]],
            null_count=-1,
            offset=arr.offset,
        )
    else:
        output = np.zeros(output_length, dtype=np.uint8)
        null_count = _or_na(len(arr), arr.offset,
                            arr.buffers()[0],
                            arr.buffers()[1], output)
        buf = pa.py_buffer(output)
        return pa.Array.from_buffers(pa.bool_(), len(arr), [buf, buf],
                                     null_count)

예제 #33

0

파일 보기

파일: convert.py 프로젝트: stjordanis/vaex

def trim_buffers(ar):
    # there are cases where memcopy are made, of modifications are mode (large_string_to_string)
    # in those cases, we don't want to work on the full array, and get rid of the offset if possible
    if ar.type == pa.string() or ar.type == pa.large_string():
        if isinstance(ar, pa.ChunkedArray):
            return ar  # lets assume chunked arrays are fine
        null_bitmap, offsets_buffer, bytes = ar.buffers()
        if ar.type == pa.string():
            offsets = np.frombuffer(offsets_buffer, np.int32, len(ar) + 1 + ar.offset)
        else:
            offsets = np.frombuffer(offsets_buffer, np.int64, len(ar) + 1 + ar.offset)
        # because it is difficult to slice bits
        new_offset = ar.offset % 8
        remove_offset = (ar.offset // 8) * 8
        first_offset = offsets[remove_offset]
        new_offsets = offsets[remove_offset:] - first_offset
        if null_bitmap:
            null_bitmap = null_bitmap.slice(ar.offset // 8)
        new_offsets_buffer = pa.py_buffer(new_offsets)
        bytes = bytes.slice(first_offset)
        ar = pa.Array.from_buffers(ar.type, len(ar), [null_bitmap, new_offsets_buffer, bytes], offset=new_offset)
    return ar

예제 #34

0

파일 보기

파일: data_reader.py 프로젝트: fairtide/DataFrame

    def _make_mask(self):
        assert self.data.length is not None

        if self.data.type.equals(pyarrow.null()):
            self.data.null_count = self.data.length
            return None

        if self.data.length == 0:
            self.data.null_count = 0
            return None

        bits = self._decompress(self._doc[MASK])
        vals = numpy.unpackbits(numpy.ndarray(len(bits), numpy.uint8, bits),
                                bitorder='big')

        self.data.null_count = self.data.length - numpy.sum(vals)
        if self.data.null_count == 0:
            return None

        mask = numpy.packbits(vals, bitorder='little')

        return pyarrow.py_buffer(mask.tobytes())

예제 #35

0

파일 보기

def read_vineyard_dataframe(vineyard_socket, path, storage_options,
                            read_options, proc_num, proc_index):
    client = vineyard.connect(vineyard_socket)
    builder = DataframeStreamBuilder(client)
    if storage_options:
        raise ValueError("Read vineyard current not support storage options")
    builder["header_row"] = "1" if read_options.get("header_row",
                                                    False) else "0"
    builder["delimiter"] = bytes(read_options.get("delimiter", ","),
                                 "utf-8").decode("unicode_escape")

    stream = builder.seal(client)
    client.persist(stream)
    ret = {"type": "return", "content": repr(stream.id)}
    print(json.dumps(ret), flush=True)

    name = urlparse(path).netloc
    # the "name" part in URL can be a name, or an ObjectID for convenience.
    try:
        df_id = client.get_name(name)
    except:
        df_id = vineyard.ObjectID(name)
    dataframes = client.get(df_id)

    writer = stream.open_writer(client)
    for df in dataframes:
        rb = pa.RecordBatch.from_pandas(df)
        sink = pa.BufferOutputStream()
        rb_writer = pa.ipc.new_stream(sink, rb.schema)
        rb_writer.write_batch(rb)
        rb_writer.close()
        buf = sink.getvalue()
        chunk = writer.next(buf.size)
        buf_writer = pa.FixedSizeBufferWriter(pa.py_buffer(chunk))
        buf_writer.write(buf)
        buf_writer.close()

    writer.finish()

예제 #36

0

파일 보기

def apply(bytes, parameters=None):
    """
    Apply the deserialization to the bytes produced by Pyarrow serialization

    Parameters
    --------------
    bytes
        Bytes
    parameters
        Parameters of the algorithm

    Returns
    --------------
    deser
        Deserialized object
    """
    if parameters is None:
        parameters = {}
    buffer = pyarrow.py_buffer(bytes)
    list_events = pyarrow.deserialize(buffer)
    for i in range(len(list_events)):
        list_events[i] = Event(list_events[i])
    return EventStream(list_events)

예제 #37

0

파일 보기

파일: test_serialization.py 프로젝트: rustbeltanalytica/Web-Assets

def test_serialize_to_components_invalid_cases():
    buf = pa.py_buffer(b'hello')

    components = {
        'num_tensors': 0,
        'num_sparse_tensors': {'coo': 0, 'csr': 0},
        'num_ndarrays': 0,
        'num_buffers': 1,
        'data': [buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)

    components = {
        'num_tensors': 0,
        'num_sparse_tensors': {'coo': 0, 'csr': 0},
        'num_ndarrays': 1,
        'num_buffers': 0,
        'data': [buf, buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)

예제 #38

0

파일 보기

파일: test_serialization.py 프로젝트: emkornfield/arrow

 def serialize_buffer_class(obj):
     return pa.py_buffer(b"hello")

예제 #39

0

파일 보기

파일: test_io.py 프로젝트: rok/arrow

def test_buffer_hex(val, expected_hex_buffer):
    buf = pa.py_buffer(val)
    assert buf.hex() == expected_hex_buffer

예제 #40

0

파일 보기

파일: test_csv.py 프로젝트: wesm/arrow

 def read_bytes(self, b, **kwargs):
     return self.read_csv(pa.py_buffer(b), **kwargs)

예제 #41

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

 def make_buffer(bytes_obj):
     return bytearray(pa.py_buffer(bytes_obj))

예제 #42

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_invalid():
    with pytest.raises(TypeError,
                       match="(bytes-like object|buffer interface)"):
        pa.py_buffer(None)

예제 #43

0

파일 보기

파일: test_io.py 프로젝트: sunchao/arrow

def test_buffer_hashing():
    # Buffers are unhashable
    with pytest.raises(TypeError, match="unhashable"):
        hash(pa.py_buffer(b'123'))