def test_float_nulls(self): num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 dtypes = [('f4', A.float_()), ('f8', A.double())] names = ['f4', 'f8'] expected_cols = [] arrays = [] fields = [] for name, arrow_dtype in dtypes: values = np.random.randn(num_values).astype(name) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) fields.append(A.Field.from_py(name, arrow_dtype)) values[null_mask] = np.nan expected_cols.append(values) ex_frame = pd.DataFrame(dict(zip(names, expected_cols)), columns=names) table = A.Table.from_arrays(arrays, names) assert table.schema.equals(A.Schema.from_fields(fields)) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def test_float_nulls(self): num_values = 100 null_mask = np.random.randint(0, 10, size=num_values) < 3 dtypes = [('f4', A.float_()), ('f8', A.double())] names = ['f4', 'f8'] expected_cols = [] arrays = [] fields = [] for name, arrow_dtype in dtypes: values = np.random.randn(num_values).astype(name) arr = A.from_pandas_series(values, null_mask) arrays.append(arr) fields.append(A.Field.from_py(name, arrow_dtype)) values[null_mask] = np.nan expected_cols.append(values) ex_frame = pd.DataFrame(dict(zip(names, expected_cols)), columns=names) table = A.Table.from_arrays(names, arrays) assert table.schema.equals(A.Schema.from_fields(fields)) result = table.to_pandas() tm.assert_frame_equal(result, ex_frame)
def dataframe_with_arrays(): """ Dataframe with numpy arrays columns of every possible primtive type. Returns ------- df: pandas.DataFrame schema: pyarrow.Schema Arrow schema definition that is in line with the constructed df. """ dtypes = [('i1', pa.int8()), ('i2', pa.int16()), ('i4', pa.int32()), ('i8', pa.int64()), ('u1', pa.uint8()), ('u2', pa.uint16()), ('u4', pa.uint32()), ('u8', pa.uint64()), ('f4', pa.float_()), ('f8', pa.double())] arrays = OrderedDict() fields = [] for dtype, arrow_dtype in dtypes: fields.append(pa.field(dtype, pa.list_(arrow_dtype))) arrays[dtype] = [ np.arange(10, dtype=dtype), np.arange(5, dtype=dtype), None, np.arange(1, dtype=dtype) ] fields.append(pa.field('str', pa.list_(pa.string()))) arrays['str'] = [ np.array([u"1", u"ä"], dtype="object"), None, np.array([u"1"], dtype="object"), np.array([u"1", u"2", u"3"], dtype="object") ] fields.append(pa.field('datetime64', pa.list_(pa.timestamp('ms')))) arrays['datetime64'] = [ np.array(['2007-07-13T01:23:34.123456789', None, '2010-08-13T05:46:57.437699912'], dtype='datetime64[ms]'), None, None, np.array(['2007-07-13T02', None, '2010-08-13T05:46:57.437699912'], dtype='datetime64[ms]'), ] df = pd.DataFrame(arrays) schema = pa.Schema.from_fields(fields) return df, schema
def test_float_no_nulls(self): data = {} fields = [] dtypes = [('f4', A.float_()), ('f8', A.double())] num_values = 100 for numpy_dtype, arrow_dtype in dtypes: values = np.random.randn(num_values) data[numpy_dtype] = values.astype(numpy_dtype) fields.append(A.Field.from_py(numpy_dtype, arrow_dtype)) df = pd.DataFrame(data) schema = A.Schema.from_fields(fields) self._check_pandas_roundtrip(df, expected_schema=schema)