def _preprocess_host_value(self, value, dtype): valid = not cudf._lib.scalar._is_null_host_scalar(value) if isinstance(value, list): if dtype is not None: raise TypeError("Lists may not be cast to a different dtype") else: dtype = ListDtype.from_arrow( pa.infer_type([value], from_pandas=True)) return value, dtype elif isinstance(dtype, ListDtype): if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to ListDtype") else: return NA, dtype if isinstance(value, dict): if dtype is None: dtype = StructDtype.from_arrow( pa.infer_type([value], from_pandas=True)) return value, dtype elif isinstance(dtype, StructDtype): if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to StructDType") else: return NA, dtype if isinstance(dtype, Decimal64Dtype): value = pa.scalar(value, type=pa.decimal128(dtype.precision, dtype.scale)).as_py() if isinstance(value, decimal.Decimal) and dtype is None: dtype = Decimal64Dtype._from_decimal(value) value = to_cudf_compatible_scalar(value, dtype=dtype) if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype if not isinstance(dtype, Decimal64Dtype): dtype = cudf.dtype(dtype) if not valid: value = NA return value, dtype
def test_infer_type_masked(): # ARROW-5208 ty = pa.infer_type([u'foo', u'bar', None, 2], mask=[False, False, False, True]) assert ty == pa.utf8() # all masked ty = pa.infer_type([u'foo', u'bar', None, 2], mask=np.array([True, True, True, True])) assert ty == pa.null() # length 0 assert pa.infer_type([], mask=[]) == pa.null()
def test_nested_ndarray_in_object_array(): # ARROW-4350 arr = np.empty(2, dtype=object) arr[:] = [np.array([1, 2], dtype=np.int64), np.array([2, 3], dtype=np.int64)] arr2 = np.empty(2, dtype=object) arr2[0] = [3, 4] arr2[1] = [5, 6] expected_type = pa.list_(pa.list_(pa.int64())) assert pa.infer_type([arr]) == expected_type result = pa.array([arr, arr2]) expected = pa.array([[[1, 2], [2, 3]], [[3, 4], [5, 6]]], type=expected_type) assert result.equals(expected) # test case for len-1 arrays to ensure they are interpreted as # sublists and not scalars arr = np.empty(2, dtype=object) arr[:] = [np.array([1]), np.array([2])] result = pa.array([arr, arr]) assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
def test_list_scalar_device_construction_null(nesting_level): data = [[]] for i in range(nesting_level - 1): data = [data] arrow_type = pa.infer_type(data) arrow_arr = pa.array([None], type=arrow_type) col = cudf.Series(arrow_arr)._column slr = get_element(col, 0) assert slr.value is cudf.NA
def test_multidimensional_ndarray_as_nested_list(): # TODO(wesm): see ARROW-5645 arr = np.array([[1, 2], [2, 3]], dtype=np.int64) arr2 = np.array([[3, 4], [5, 6]], dtype=np.int64) expected_type = pa.list_(pa.list_(pa.int64())) assert pa.infer_type([arr]) == expected_type result = pa.array([arr, arr2]) expected = pa.array([[[1, 2], [2, 3]], [[3, 4], [5, 6]]], type=expected_type) assert result.equals(expected)