Example #1
0
    def _preprocess_host_value(self, value, dtype):
        valid = not cudf._lib.scalar._is_null_host_scalar(value)

        if isinstance(value, list):
            if dtype is not None:
                raise TypeError("Lists may not be cast to a different dtype")
            else:
                dtype = ListDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
                return value, dtype
        elif isinstance(dtype, ListDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to ListDtype")
            else:
                return NA, dtype

        if isinstance(value, dict):
            if dtype is None:
                dtype = StructDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
            return value, dtype
        elif isinstance(dtype, StructDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to StructDType")
            else:
                return NA, dtype

        if isinstance(dtype, Decimal64Dtype):
            value = pa.scalar(value,
                              type=pa.decimal128(dtype.precision,
                                                 dtype.scale)).as_py()
        if isinstance(value, decimal.Decimal) and dtype is None:
            dtype = Decimal64Dtype._from_decimal(value)

        value = to_cudf_compatible_scalar(value, dtype=dtype)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype

        if not isinstance(dtype, Decimal64Dtype):
            dtype = cudf.dtype(dtype)

        if not valid:
            value = NA

        return value, dtype
Example #2
0
def test_infer_type_masked():
    # ARROW-5208
    ty = pa.infer_type([u'foo', u'bar', None, 2],
                       mask=[False, False, False, True])
    assert ty == pa.utf8()

    # all masked
    ty = pa.infer_type([u'foo', u'bar', None, 2],
                       mask=np.array([True, True, True, True]))
    assert ty == pa.null()

    # length 0
    assert pa.infer_type([], mask=[]) == pa.null()
def test_nested_ndarray_in_object_array():
    # ARROW-4350
    arr = np.empty(2, dtype=object)
    arr[:] = [np.array([1, 2], dtype=np.int64),
              np.array([2, 3], dtype=np.int64)]

    arr2 = np.empty(2, dtype=object)
    arr2[0] = [3, 4]
    arr2[1] = [5, 6]

    expected_type = pa.list_(pa.list_(pa.int64()))
    assert pa.infer_type([arr]) == expected_type

    result = pa.array([arr, arr2])
    expected = pa.array([[[1, 2], [2, 3]], [[3, 4], [5, 6]]],
                        type=expected_type)

    assert result.equals(expected)

    # test case for len-1 arrays to ensure they are interpreted as
    # sublists and not scalars
    arr = np.empty(2, dtype=object)
    arr[:] = [np.array([1]), np.array([2])]
    result = pa.array([arr, arr])
    assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
Example #4
0
def test_list_scalar_device_construction_null(nesting_level):
    data = [[]]
    for i in range(nesting_level - 1):
        data = [data]

    arrow_type = pa.infer_type(data)
    arrow_arr = pa.array([None], type=arrow_type)

    col = cudf.Series(arrow_arr)._column
    slr = get_element(col, 0)

    assert slr.value is cudf.NA
def test_multidimensional_ndarray_as_nested_list():
    # TODO(wesm): see ARROW-5645
    arr = np.array([[1, 2], [2, 3]], dtype=np.int64)
    arr2 = np.array([[3, 4], [5, 6]], dtype=np.int64)

    expected_type = pa.list_(pa.list_(pa.int64()))
    assert pa.infer_type([arr]) == expected_type

    result = pa.array([arr, arr2])
    expected = pa.array([[[1, 2], [2, 3]], [[3, 4], [5, 6]]],
                        type=expected_type)

    assert result.equals(expected)