Beispiel #1
0
    def test_isse_3237(self, duckdb_cursor):
        def process(rel):
            def mapper(x):
                dates = x['date'].to_numpy("datetime64[us]")
                days = x['days_to_add'].to_numpy("int")
                x["result1"] = pd.Series([
                    pd.to_datetime(y[0]).date() + timedelta(days=y[1].item())
                    for y in zip(dates, days)
                ],
                                         dtype='datetime64[us]')
                x["result2"] = pd.Series([
                    pd.to_datetime(y[0]).date() + timedelta(days=-y[1].item())
                    for y in zip(dates, days)
                ],
                                         dtype='datetime64[us]')
                return x

            rel = rel.map(mapper)
            rel = rel.project("*, datediff('day', date, result1) as one")
            rel = rel.project("*, datediff('day', date, result2) as two")
            rel = rel.project("*, IF(ABS(one) > ABS(two), one, two) as three")
            return rel

        df = pd.DataFrame({
            'date':
            pd.Series([date(2000, 1, 1), date(2000, 1, 2)],
                      dtype="datetime64[us]"),
            'days_to_add': [1, 2]
        })
        rel = duckdb.from_df(df)
        rel = process(rel)
        x = rel.execute().fetchdf()
        assert x['days_to_add'].to_numpy()[0] == 1
Beispiel #2
0
 def test_timestamp_timedelta(self, duckdb_cursor):
     df = pd.DataFrame({
         'a': [pd.Timedelta(1, unit='s')],
         'b': [pd.Timedelta(None, unit='s')],
         'c': [pd.Timedelta(1, unit='us')],
         'd': [pd.Timedelta(1, unit='ms')]
     })
     df_from_duck = duckdb.from_df(df).df()
     assert (df_from_duck.equals(df))
 def test_from_df(self, duckdb_cursor):
     conn = duckdb.connect()
     conn.execute("create table t (a integer)")
     conn.execute("insert into t values (1)")
     test_df = pd.DataFrame.from_dict({"i":[1, 2, 3, 4]})
     rel = duckdb.df(test_df, connection=conn)
     assert rel.query('t_2','select count(*) from t inner join t_2 on (a = i)').fetchall()[0] ==  (1,)
     rel = duckdb.from_df(test_df, connection=conn)
     assert rel.query('t_2','select count(*) from t inner join t_2 on (a = i)').fetchall()[0] ==  (1,)
Beispiel #4
0
 def test_timestamp_types_roundtrip(self, duckdb_cursor):
     d = {
         'a': [pd.Timestamp(datetime.datetime.now(), unit='s')],
         'b': [pd.Timestamp(datetime.datetime.now(), unit='ms')],
         'c': [pd.Timestamp(datetime.datetime.now(), unit='us')],
         'd': [pd.Timestamp(datetime.datetime.now(), unit='ns')]
     }
     df = pd.DataFrame(data=d)
     df_from_duck = duckdb.from_df(df).df()
     assert (df_from_duck.equals(df))
Beispiel #5
0
 def test_timestamp_nulls(self, duckdb_cursor):
     d = {
         'a': [pd.Timestamp(None, unit='s')],
         'b': [pd.Timestamp(None, unit='ms')],
         'c': [pd.Timestamp(None, unit='us')],
         'd': [pd.Timestamp(None, unit='ns')]
     }
     df = pd.DataFrame(data=d)
     df_from_duck = duckdb.from_df(df).df()
     assert (df_from_duck.equals(df))
Beispiel #6
0
def from_df():
    df = pd.DataFrame({"x": np.random.rand(1_000_000)})
    return duckdb.from_df(df)