Beispiel #1
0
    def test_serialize_read_message(self):
        _, messages = self._get_example_messages()

        msg = messages[0]
        buf = msg.serialize()

        restored = pa.read_message(buf)
        restored2 = pa.read_message(pa.BufferReader(buf))
        restored3 = pa.read_message(buf.to_pybytes())

        assert msg.equals(restored)
        assert msg.equals(restored2)
        assert msg.equals(restored3)
Beispiel #2
0
    def test_serialize_read_message(self):
        _, messages = self._get_example_messages()

        msg = messages[0]
        buf = msg.serialize()

        restored = pa.read_message(buf)
        restored2 = pa.read_message(pa.BufferReader(buf))
        restored3 = pa.read_message(buf.to_pybytes())

        assert msg.equals(restored)
        assert msg.equals(restored2)
        assert msg.equals(restored3)
Beispiel #3
0
def test_message_read_from_compressed(example_messages):
    # Part of ARROW-5910
    _, messages = example_messages
    for message in messages:
        raw_out = pa.BufferOutputStream()
        with pa.output_stream(raw_out, compression='gzip') as compressed_out:
            message.serialize_to(compressed_out)

        compressed_buf = raw_out.getvalue()

        result = pa.read_message(pa.input_stream(compressed_buf,
                                                 compression='gzip'))
        assert result.equals(message)
Beispiel #4
0
def _load_data(buf, schema):
    """
    Load a `pandas.DataFrame` from a buffer written to shared memory

    Parameters
    ----------
    buf : pyarrow.Buffer
    shcema : pyarrow.Schema

    Returns
    -------
    df : pandas.DataFrame
    """
    import pyarrow as pa

    message = pa.read_message(buf)
    rb = pa.read_record_batch(message, schema)
    return rb.to_pandas()
Beispiel #5
0
def test_message_serialize_read_message(example_messages):
    _, messages = example_messages

    msg = messages[0]
    buf = msg.serialize()
    reader = pa.BufferReader(buf.to_pybytes() * 2)

    restored = pa.read_message(buf)
    restored2 = pa.read_message(reader)
    restored3 = pa.read_message(buf.to_pybytes())
    restored4 = pa.read_message(reader)

    assert msg.equals(restored)
    assert msg.equals(restored2)
    assert msg.equals(restored3)
    assert msg.equals(restored4)

    with pytest.raises(pa.ArrowInvalid, match="Corrupted message"):
        pa.read_message(pa.BufferReader(b'ab'))

    with pytest.raises(EOFError):
        pa.read_message(reader)
Beispiel #6
0
def _load_data(buf, schema, tdf=None):
    """
    Load a `pandas.DataFrame` from a buffer written to shared memory

    Parameters
    ----------
    buf : pyarrow.Buffer
    shcema : pyarrow.Schema
    tdf(optional) : TDataFrame

    Returns
    -------
    df : pandas.DataFrame
    """
    message = pa.read_message(buf)
    rb = pa.read_record_batch(message, schema)
    df = rb.to_pandas()
    df.set_tdf = MethodType(set_tdf, df)
    df.get_tdf = MethodType(get_tdf, df)
    df.set_tdf(tdf)
    return df