def test_isse_3237(self, duckdb_cursor): def process(rel): def mapper(x): dates = x['date'].to_numpy("datetime64[us]") days = x['days_to_add'].to_numpy("int") x["result1"] = pd.Series([ pd.to_datetime(y[0]).date() + timedelta(days=y[1].item()) for y in zip(dates, days) ], dtype='datetime64[us]') x["result2"] = pd.Series([ pd.to_datetime(y[0]).date() + timedelta(days=-y[1].item()) for y in zip(dates, days) ], dtype='datetime64[us]') return x rel = rel.map(mapper) rel = rel.project("*, datediff('day', date, result1) as one") rel = rel.project("*, datediff('day', date, result2) as two") rel = rel.project("*, IF(ABS(one) > ABS(two), one, two) as three") return rel df = pd.DataFrame({ 'date': pd.Series([date(2000, 1, 1), date(2000, 1, 2)], dtype="datetime64[us]"), 'days_to_add': [1, 2] }) rel = duckdb.from_df(df) rel = process(rel) x = rel.execute().fetchdf() assert x['days_to_add'].to_numpy()[0] == 1
def test_timestamp_timedelta(self, duckdb_cursor): df = pd.DataFrame({ 'a': [pd.Timedelta(1, unit='s')], 'b': [pd.Timedelta(None, unit='s')], 'c': [pd.Timedelta(1, unit='us')], 'd': [pd.Timedelta(1, unit='ms')] }) df_from_duck = duckdb.from_df(df).df() assert (df_from_duck.equals(df))
def test_from_df(self, duckdb_cursor): conn = duckdb.connect() conn.execute("create table t (a integer)") conn.execute("insert into t values (1)") test_df = pd.DataFrame.from_dict({"i":[1, 2, 3, 4]}) rel = duckdb.df(test_df, connection=conn) assert rel.query('t_2','select count(*) from t inner join t_2 on (a = i)').fetchall()[0] == (1,) rel = duckdb.from_df(test_df, connection=conn) assert rel.query('t_2','select count(*) from t inner join t_2 on (a = i)').fetchall()[0] == (1,)
def test_timestamp_types_roundtrip(self, duckdb_cursor): d = { 'a': [pd.Timestamp(datetime.datetime.now(), unit='s')], 'b': [pd.Timestamp(datetime.datetime.now(), unit='ms')], 'c': [pd.Timestamp(datetime.datetime.now(), unit='us')], 'd': [pd.Timestamp(datetime.datetime.now(), unit='ns')] } df = pd.DataFrame(data=d) df_from_duck = duckdb.from_df(df).df() assert (df_from_duck.equals(df))
def test_timestamp_nulls(self, duckdb_cursor): d = { 'a': [pd.Timestamp(None, unit='s')], 'b': [pd.Timestamp(None, unit='ms')], 'c': [pd.Timestamp(None, unit='us')], 'd': [pd.Timestamp(None, unit='ns')] } df = pd.DataFrame(data=d) df_from_duck = duckdb.from_df(df).df() assert (df_from_duck.equals(df))
def from_df(): df = pd.DataFrame({"x": np.random.rand(1_000_000)}) return duckdb.from_df(df)