Ejemplo n.º 1
0
def format_timestamp_array(arr: pa.Array) -> pa.Array:
    """Build a PyArrow utf8 array from a timestamp array.

    The output Array will have the same length as the input.

    The output Array will consume RAM using two new, contiguous buffers.

    The format will be ISO8601, as precise as needed.
    """
    valid_buf, num_buf = arr.buffers()
    if arr.type.unit != "ns":
        raise NotImplementedError("TODO handle non-ns")  # pragma: no cover

    nums = memoryview(num_buf).cast("l")  # l = int64
    num_iter = _num_iter(valid_buf, nums)

    offset = 0
    out_offsets = array.array("I")  # uint32
    out_utf8 = io.BytesIO()

    for num in num_iter:
        # At each number, output the _start_ offset of that number
        out_offsets.append(offset)
        if num is not None:
            formatted, n = codecs.readbuffer_encode(_ns_to_iso8601(num))
            out_utf8.write(formatted)
            offset += n

    out_offsets.append(offset)

    return pa.StringArray.from_buffers(
        length=len(arr),
        value_offsets=pa.py_buffer(out_offsets.tobytes()),
        data=pa.py_buffer(bytes(out_utf8.getbuffer())),
        null_bitmap=valid_buf,
        null_count=arr.null_count,
    )
Ejemplo n.º 2
0
 def test_empty(self):
     self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
Ejemplo n.º 3
0
 def test_array(self):
     import array
     self.assertEqual(codecs.readbuffer_encode(array.array("c", "spam")),
                      ("spam", 4))
Ejemplo n.º 4
0
def format_date_array(arr: pa.Array, unit: DateUnit) -> pa.Array:
    """Build a PyArrow utf8 array from a date32 array.

    The output Array will have the same length as the input.

    The output Array will consume RAM using two new, contiguous buffers.

    Formats (for date "2022-08-01", a Monday):

    * day: "2022-08-01"
    * week: "2022-08-01"
    * month: "2022-08"
    * quarter: "2022 Q3"
    * year: "2022"

    The format will be ISO8601, as precise as needed.
    """
    valid_buf, num_buf = arr.buffers()
    nums = memoryview(num_buf).cast("i")  # i = int32
    num_iter = _num_iter(valid_buf, nums)

    offset = 0
    out_offsets = array.array("I")  # uint32
    out_utf8 = io.BytesIO()

    # date32 allows negative years; Python `datetime.date` doesn't. Don't use
    # datetime.date.
    if unit == "year":

        def _format(day: int) -> str:
            return str(time.gmtime(86400 * day).tm_year)

    elif unit == "quarter":

        def _format(day: int) -> str:
            st = time.gmtime(86400 * day)
            return str(st.tm_year) + " Q" + str((st.tm_mon + 2) // 3)

    elif unit == "month":

        def _format(day: int) -> str:
            st = time.gmtime(86400 * day)
            return str(st.tm_year) + "-" + str(st.tm_mon).zfill(2)

    else:

        def _format(day: int) -> str:
            st = time.gmtime(86400 * day)
            return (str(st.tm_year) + "-" + str(st.tm_mon).zfill(2) + "-" +
                    str(st.tm_mday).zfill(2))

    for num in num_iter:
        # At each number, output the _start_ offset of that number
        out_offsets.append(offset)
        if num is not None:
            formatted, n = codecs.readbuffer_encode(_format(num))
            out_utf8.write(formatted)
            offset += n

    out_offsets.append(offset)

    return pa.StringArray.from_buffers(
        length=len(arr),
        value_offsets=pa.py_buffer(out_offsets.tobytes()),
        data=pa.py_buffer(bytes(out_utf8.getbuffer())),
        null_bitmap=valid_buf,
        null_count=arr.null_count,
    )
Ejemplo n.º 5
0
 def test_empty(self):
     self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
Ejemplo n.º 6
0
 def test_array(self):
     import array
     self.assertEqual(
         codecs.readbuffer_encode(array.array("c", "spam")),
         ("spam", 4)
     )
Ejemplo n.º 7
0
 def update_event(self, inp=-1):
     self.set_output_val(
         0, codecs.readbuffer_encode(self.input(0), self.input(1)))