def test_integer_with_nulls(self): # pandas requires upcast to float dtype int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8'] num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 expected_cols = [] arrays = [] for name in int_dtypes: values = np.random.randint(0, 100, size=num_values) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) expected = values.astype('f8') expected[null_mask] = np.nan expected_cols.append(expected) ex_frame = pd.DataFrame(dict(zip(int_dtypes, expected_cols)), columns=int_dtypes) table = A.Table.from_arrays(int_dtypes, arrays) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_integer_with_nulls(self): # pandas requires upcast to float dtype int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8'] num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 expected_cols = [] arrays = [] for name in int_dtypes: values = np.random.randint(0, 100, size=num_values) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) expected = values.astype('f8') expected[null_mask] = np.nan expected_cols.append(expected) ex_frame = pd.DataFrame(dict(zip(int_dtypes, expected_cols)), columns=int_dtypes) table = A.Table.from_arrays(arrays, int_dtypes) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_float_nulls(self): num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 dtypes = [('f4', A.float_()), ('f8', A.double())] names = ['f4', 'f8'] expected_cols = [] arrays = [] fields = [] for name, arrow_dtype in dtypes: values = np.random.randn(num_values).astype(name) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) fields.append(A.Field.from_py(name, arrow_dtype)) values[null_mask] = np.nan expected_cols.append(values) ex_frame = pd.DataFrame(dict(zip(names, expected_cols)), columns=names) table = A.Table.from_arrays(arrays, names) assert table.schema.equals(A.Schema.from_fields(fields)) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_float_nulls(self): num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 dtypes = [('f4', A.float_()), ('f8', A.double())] names = ['f4', 'f8'] expected_cols = [] arrays = [] fields = [] for name, arrow_dtype in dtypes: values = np.random.randn(num_values).astype(name) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) fields.append(A.Field.from_py(name, arrow_dtype)) values[null_mask] = np.nan expected_cols.append(values) ex_frame = pd.DataFrame(dict(zip(names, expected_cols)), columns=names) table = A.Table.from_arrays(names, arrays) assert table.schema.equals(A.Schema.from_fields(fields)) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_boolean_nulls(self): # pandas requires upcast to object dtype num_values = 100 np.random.seed(0) mask = np.random.randint(0, 10, size=num_values) < 3 values = np.random.randint(0, 10, size=num_values) < 5 arr = A.from_pandas_series(values, mask) expected = values.astype(object) expected[mask] = None ex_frame = pd.DataFrame({'bools': expected}) table = A.Table.from_arrays(['bools'], [arr]) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_boolean_nulls(self): # pandas requires upcast to object dtype num_values = 100 np.random.seed(0) mask = np.random.randint(0, 10, size=num_values) < 3 values = np.random.randint(0, 10, size=num_values) < 5 arr = A.from_pandas_series(values, mask) expected = values.astype(object) expected[mask] = None field = A.Field.from_py('bools', A.bool_()) schema = A.Schema.from_fields([field]) ex_frame = pd.DataFrame({'bools': expected}) table = A.Table.from_arrays([arr], ['bools']) assert table.schema.equals(schema) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_float_nulls(self): num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 dtypes = ['f4', 'f8'] expected_cols = [] arrays = [] for name in dtypes: values = np.random.randn(num_values).astype(name) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) values[null_mask] = np.nan expected_cols.append(values) ex_frame = pd.DataFrame(dict(zip(dtypes, expected_cols)), columns=dtypes) table = A.Table.from_arrays(dtypes, arrays) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)