def test_window_bind_to_table(t): w = ibis.window(group_by="g", order_by=ibis.desc("f")) w2 = w.bind(t) expected = ibis.window(group_by=t.g, order_by=ibis.desc(t.f)) assert_equal(w2, expected)
def test_window_bind_to_table(self): w = ibis.window(group_by='g', order_by=ibis.desc('f')) w2 = w.bind(self.t) expected = ibis.window(group_by=self.t.g, order_by=ibis.desc(self.t.f)) assert_equal(w2, expected)
def test_window_bind_to_table(alltypes): t = alltypes w = ibis.window(group_by='g', order_by=ibis.desc('f')) w2 = w.bind(alltypes) expected = ibis.window(group_by=t.g, order_by=ibis.desc(t.f)) assert_equal(w2, expected)
def test_sort_by_desc_deferred_sort_key(table): result = (table.group_by('g').size().sort_by(ibis.desc('count'))) tmp = table.group_by('g').size() expected = tmp.sort_by((tmp['count'], False)) expected2 = tmp.sort_by(ibis.desc(tmp['count'])) assert_equal(result, expected) assert_equal(result, expected2)
def test_sort_by_desc_deferred_sort_key(table): result = table.group_by('g').size().sort_by(ibis.desc('count')) tmp = table.group_by('g').size() expected = tmp.sort_by((tmp['count'], False)) expected2 = tmp.sort_by(ibis.desc(tmp['count'])) assert_equal(result, expected) assert_equal(result, expected2)
def test_sort_by2(table): m = table.mutate(foo=table.e + table.f) result = m.sort_by(lambda x: -x.foo) expected = m.sort_by(-m.foo) assert_equal(result, expected) result = m.sort_by(lambda x: ibis.desc(x.foo)) expected = m.sort_by(ibis.desc('foo')) assert_equal(result, expected) result = m.sort_by(ibis.desc(lambda x: x.foo)) expected = m.sort_by(ibis.desc('foo')) assert_equal(result, expected)
def test_sort_by(self): m = self.table.mutate(foo=self.table.e + self.table.f) result = m.sort_by(lambda x: -x.foo) expected = m.sort_by(-m.foo) assert_equal(result, expected) result = m.sort_by(lambda x: ibis.desc(x.foo)) expected = m.sort_by(ibis.desc('foo')) assert_equal(result, expected) result = m.sort_by(ibis.desc(lambda x: x.foo)) expected = m.sort_by(ibis.desc('foo')) assert_equal(result, expected)
def test_lower_projection_sort_key(self): expr = self._case_subquery_aliased() t3 = self._get_sqla('star1').alias('t3') t2 = self._get_sqla('star2').alias('t2') t4 = ( sa.select([t3.c.foo_id, F.sum(t3.c.f).label('total')]) .group_by(t3.c.foo_id) .alias('t4') ) t1 = ( sa.select([t4.c.foo_id, t4.c.total, t2.c.value1]) .select_from(t4.join(t2, t4.c.foo_id == t2.c.foo_id)) .alias('t1') ) t0 = ( sa.select([t1.c.foo_id, t1.c.total, t1.c.value1]) .where(t1.c.total > L(100)) .alias('t0') ) expected = sa.select([t0.c.foo_id, t0.c.total, t0.c.value1]).order_by( t0.c.total.desc() ) expr2 = expr[expr.total > 100].sort_by(ibis.desc('total')) self._compare_sqla(expr2, expected)
def tpc_h05(con, NAME="ASIA", DATE="1994-01-01"): customer = con.table("customer") orders = con.table("orders") lineitem = con.table("lineitem") supplier = con.table("supplier") nation = con.table("nation") region = con.table("region") q = customer q = q.join(orders, customer.c_custkey == orders.o_custkey) q = q.join(lineitem, lineitem.l_orderkey == orders.o_orderkey) q = q.join(supplier, lineitem.l_suppkey == supplier.s_suppkey) q = q.join( nation, (customer.c_nationkey == supplier.s_nationkey) & (supplier.s_nationkey == nation.n_nationkey), ) q = q.join(region, nation.n_regionkey == region.r_regionkey) q = q.filter([ q.r_name == NAME, q.o_orderdate >= DATE, q.o_orderdate < add_date(DATE, dy=1) ]) revexpr = q.l_extendedprice * (1 - q.l_discount) gq = q.group_by([q.n_name]) q = gq.aggregate(revenue=revexpr.sum()) q = q.sort_by([ibis.desc(q.revenue)]) return q
def log_scraper_last_complete(expr): last_completion = expr.filter( expr.event.isin(['sql_execute', 'render_vega']))\ .sort_by(ibis.desc('logtime'))\ .select(['logtime', 'event', 'query', 'sequence', 'logfile']) # print(last_completion.compile()) return last_completion.execute(1)
def test_compound_expression(diamonds: ir.TableExpr) -> None: expected = diamonds[diamonds.price * diamonds.price / 2.0 >= 100] expected = expected.groupby('cut').aggregate([ expected.carat.max().name('max_carat'), expected.carat.mean().name('mean_carat'), expected.carat.min().name('min_carat'), expected.x.count().name('n'), expected.carat.std().name('std_carat'), expected.carat.sum().name('sum_carat'), expected.carat.var().name('var_carat'), ]) expected = expected.mutate(foo=expected.mean_carat, bar=expected.var_carat).sort_by( [ibis.desc('foo'), 'bar']).head() result = (diamonds >> sift(X.price * X.price / 2.0 >= 100) >> groupby( X.cut) >> summarize( max_carat=max(X.carat), mean_carat=mean(X.carat), min_carat=min(X.carat), n=n(X.x), std_carat=std(X.carat), sum_carat=sum(X.carat), var_carat=var(X.carat), ) >> mutate(foo=X.mean_carat, bar=X.var_carat) >> sort_by( desc(X.foo), X.bar) >> head(5)) assert result.equals(expected) tm.assert_frame_equal(expected.execute(), result >> do())
def test_bug_duplicated_where(airlines): # GH #539 table = airlines t = table['arrdelay', 'dest'] expr = t.group_by('dest').mutate( dest_avg=t.arrdelay.mean(), dev=t.arrdelay - t.arrdelay.mean() ) tmp1 = expr[expr.dev.notnull()] tmp2 = tmp1.sort_by(ibis.desc('dev')) worst = tmp2.limit(10) result = Compiler.to_sql(worst) expected = """\ SELECT * FROM ( SELECT t1.* FROM ( SELECT *, avg(`arrdelay`) OVER (PARTITION BY `dest`) AS `dest_avg`, `arrdelay` - avg(`arrdelay`) OVER (PARTITION BY `dest`) AS `dev` FROM ( SELECT `arrdelay`, `dest` FROM airlines ) t3 ) t1 WHERE t1.`dev` IS NOT NULL ) t0 ORDER BY `dev` DESC LIMIT 10""" assert result == expected
def test_lower_projection_sort_key(self): expr = self._case_subquery_aliased() s1 = self._get_sqla('star1').alias('t2') s2 = self._get_sqla('star2').alias('t1') expr2 = (expr [expr.total > 100] .sort_by(ibis.desc('total'))) agged = (sa.select([s1.c.foo_id, F.sum(s1.c.f).label('total')]) .group_by(s1.c.foo_id) .alias('t3')) joined = agged.join(s2, agged.c.foo_id == s2.c.foo_id) expected = sa.select([agged, s2.c.value1]).select_from(joined) joined = agged.join(s2, agged.c.foo_id == s2.c.foo_id) expected = sa.select([agged, s2.c.value1]).select_from(joined) ex = expected.alias('t0') expected2 = (sa.select([ex]) .where(ex.c.total > L(100)) .order_by(ex.c.total.desc())) self._compare_sqla(expr2, expected2)
def tpc_h09(con, COLOR="green"): part = con.table("part") supplier = con.table("supplier") lineitem = con.table("lineitem") partsupp = con.table("partsupp") orders = con.table("orders") nation = con.table("nation") q = lineitem q = q.join(supplier, supplier.s_suppkey == lineitem.l_suppkey) q = q.join( partsupp, (partsupp.ps_suppkey == lineitem.l_suppkey) & (partsupp.ps_partkey == lineitem.l_partkey), ) q = q.join(part, part.p_partkey == lineitem.l_partkey) q = q.join(orders, orders.o_orderkey == lineitem.l_orderkey) q = q.join(nation, supplier.s_nationkey == nation.n_nationkey) q = q[(q.l_extendedprice * (1 - q.l_discount) - q.ps_supplycost * q.l_quantity).name("amount"), q.o_orderdate.year().cast("string").name("o_year"), q.n_name.name("nation"), q.p_name, ] q = q.filter([q.p_name.like("%" + COLOR + "%")]) gq = q.group_by([q.nation, q.o_year]) q = gq.aggregate(sum_profit=q.amount.sum()) q = q.sort_by([q.nation, ibis.desc(q.o_year)]) return q
def tpc_h10(con, DATE="1993-10-01"): customer = con.table("customer") orders = con.table("orders") lineitem = con.table("lineitem") nation = con.table("nation") q = customer q = q.join(orders, customer.c_custkey == orders.o_custkey) q = q.join(lineitem, lineitem.l_orderkey == orders.o_orderkey) q = q.join(nation, customer.c_nationkey == nation.n_nationkey) q = q.filter([ (q.o_orderdate >= DATE) & (q.o_orderdate < add_date(DATE, dm=3)), q.l_returnflag == "R", ]) gq = q.group_by([ q.c_custkey, q.c_name, q.c_acctbal, q.c_phone, q.n_name, q.c_address, q.c_comment, ]) q = gq.aggregate(revenue=(q.l_extendedprice * (1 - q.l_discount)).sum()) q = q.sort_by(ibis.desc(q.revenue)) return q.limit(20)
def great_vcs(): con = activate_db() i = con.table('investments') c = con.table('companies') clean_name = i.investor_name.fillna('NO INVESTOR').name('investor_name') num_investments = c.permalink.nunique() exited = c.status.isin(['ipo', 'acquired']).ifelse(c.permalink, ibis.NA) num_exits = exited.nunique() stats = (c.left_join(i, c.permalink == i.company_permalink) .group_by(clean_name) .aggregate(num_investments=num_investments, num_exits=num_exits)) stats = (stats.mutate(succ_rate=(stats.num_exits / stats.num_investments.cast('float')))) stats.limit(10) great_success = (stats [stats.num_investments > 100] .sort_by(ibis.desc('succ_rate'))) top50 = great_success.limit(50) top50_dataframe = top50.execute() top50_html = top50_dataframe.to_html() # print(top20_view) # print(type(top20_view)) return top50_html
def test_sort_aggregation_translation_failure(self): # This works around a nuance with our choice to hackishly fuse SortBy # after Aggregate to produce a single select statement rather than an # inline view. t = self.alltypes agg = t.group_by('string_col').aggregate( t.double_col.max().name('foo') ) expr = agg.sort_by(ibis.desc('foo')) sat = self.sa_alltypes.alias('t1') base = ( sa.select( [sat.c.string_col, F.max(sat.c.double_col).label('foo')] ).group_by(sat.c.string_col) ).alias('t0') ex = ( sa.select([base.c.string_col, base.c.foo]) .select_from(base) .order_by(sa.desc('foo')) ) self._compare_sqla(expr, ex)
def great_vcs(): con = activate_db() i = con.table('investments') c = con.table('companies') clean_name = i.investor_name.fillna('NO INVESTOR').name('investor_name') num_investments = c.permalink.nunique() exited = c.status.isin(['ipo', 'acquired']).ifelse(c.permalink, ibis.NA) num_exits = exited.nunique() stats = (c.left_join( i, c.permalink == i.company_permalink).group_by(clean_name).aggregate( num_investments=num_investments, num_exits=num_exits)) stats = (stats.mutate(succ_rate=(stats.num_exits / stats.num_investments.cast('float')))) stats.limit(10) great_success = (stats[stats.num_investments > 100].sort_by( ibis.desc('succ_rate'))) top50 = great_success.limit(50) top50_dataframe = top50.execute() top50_html = top50_dataframe.to_html() # print(top20_view) # print(type(top20_view)) return top50_html
def tpc_h21(con, NATION="SAUDI ARABIA"): """Suppliers Who Kept Orders Waiting Query (Q21) This query identifies certain suppliers who were not able to ship required parts in a timely manner.""" supplier = con.table("supplier") lineitem = con.table("lineitem") orders = con.table("orders") nation = con.table("nation") L2 = lineitem.view() L3 = lineitem.view() q = supplier q = q.join(lineitem, supplier.s_suppkey == lineitem.l_suppkey) q = q.join(orders, orders.o_orderkey == lineitem.l_orderkey) q = q.join(nation, supplier.s_nationkey == nation.n_nationkey) q = q[q.l_orderkey.name("l1_orderkey"), q.o_orderstatus, q.l_receiptdate, q.l_commitdate, q.l_suppkey.name("l1_suppkey"), q.s_name, q.n_name, ] q = q.filter([ q.o_orderstatus == "F", q.l_receiptdate > q.l_commitdate, q.n_name == NATION, ((L2.l_orderkey == q.l1_orderkey) & (L2.l_suppkey != q.l1_suppkey)).any(), ~(((L3.l_orderkey == q.l1_orderkey) & (L3.l_suppkey != q.l1_suppkey) & (L3.l_receiptdate > L3.l_commitdate)).any()), ]) gq = q.group_by([q.s_name]) q = gq.aggregate(numwait=q.count()) q = q.sort_by([ibis.desc(q.numwait), q.s_name]) return q.limit(100)
def tpc_h16(con, BRAND="Brand#45", TYPE="MEDIUM POLISHED", SIZES=(49, 14, 23, 45, 19, 3, 36, 9)): #ibis.options.sql.default_limit = 100000 """Parts/Supplier Relationship Query (Q16) This query finds out how many suppliers can supply parts with given attributes. It might be used, for example, to determine whether there is a sufficient number of suppliers for heavily ordered parts.""" partsupp = con.table("partsupp") part = con.table("part") supplier = con.table("supplier") q = partsupp.join(part, part.p_partkey == partsupp.ps_partkey) q = q.filter([ q.p_brand != BRAND, ~q.p_type.like(f"{TYPE}%"), q.p_size.isin(SIZES), ~q.ps_suppkey.isin( supplier.filter([supplier.s_comment.like("%Customer%Complaints%") ]).s_suppkey), ]) gq = q.groupby([q.p_brand, q.p_type, q.p_size]) q = gq.aggregate(supplier_cnt=q.ps_suppkey.nunique()) q = q.sort_by([ibis.desc(q.supplier_cnt), q.p_brand, q.p_type, q.p_size]) return q
def test_bfill(events): con = ibis.pandas.connect({"t": events}) t = con.table("t") win = ibis.window(group_by=t.event_id, order_by=ibis.desc(t.measured_on), following=0) grouped = t.mutate(grouper=t.measurement.count().over(win)) expr = (grouped.group_by([ grouped.event_id, grouped.grouper ]).mutate(bfill=grouped.measurement.max()).sort_by("measured_on")) result = expr.execute().reset_index(drop=True) expected_raw = """\ event_id measured_on measurement grouper bfill 2 2021-05-05 42.0 3 42.0 2 2021-05-06 42.0 2 42.0 2 2021-05-07 NaN 1 11.0 2 2021-05-08 11.0 1 11.0 2 2021-05-09 NaN 0 NaN 2 2021-05-10 NaN 0 NaN 1 2021-06-01 NaN 1 5.0 1 2021-06-02 5.0 1 5.0 1 2021-06-03 NaN 0 NaN 1 2021-06-04 NaN 0 NaN 3 2021-07-11 NaN 0 NaN 3 2021-07-12 NaN 0 NaN""" expected = pd.read_csv( io.StringIO(expected_raw), sep=r"\s+", header=0, parse_dates=["measured_on"], ) tm.assert_frame_equal(result, expected)
def test_lower_projection_sort_key(con, subquery_aliased, star1, star2): expr = subquery_aliased t3 = con.meta.tables["star1"].alias("t3") t2 = con.meta.tables["star2"].alias("t2") t4 = ( sa.select([t3.c.foo_id, F.sum(t3.c.f).label('total')]) .group_by(t3.c.foo_id) .alias('t4') ) t1 = ( sa.select([t4.c.foo_id, t4.c.total, t2.c.value1]) .select_from(t4.join(t2, t4.c.foo_id == t2.c.foo_id)) .alias('t1') ) t0 = ( sa.select([t1.c.foo_id, t1.c.total, t1.c.value1]) .where(t1.c.total > L(100)) .alias('t0') ) expected = sa.select([t0.c.foo_id, t0.c.total, t0.c.value1]).order_by( t0.c.total.desc() ) expr2 = expr[expr.total > 100].sort_by(ibis.desc('total')) _check(expr2, expected)
def log_scraper_incomplete_queries(expr): endings = expr.filter(expr.event.isin(['sql_execute', 'render_vega' ])).select(['sequence']) incomplete = expr\ .filter(expr.event.isin(['sql_execute_begin', 'render_vega_begin'])\ & expr.sequence.notin(endings.sequence))\ .sort_by(ibis.desc('logtime')) return incomplete.select(['logtime', 'event', 'query', 'sequence'])
def test_compile_twice(dbpath): con1 = ibis.sqlite.connect(dbpath) t1 = con1.table('batting') sort_key1 = ibis.desc(t1.playerID) sorted_table1 = t1.sort_by(sort_key1) expr1 = sorted_table1.count() con2 = ibis.sqlite.connect(dbpath) t2 = con2.table('batting') sort_key2 = ibis.desc(t2.playerID) sorted_table2 = t2.sort_by(sort_key2) expr2 = sorted_table2.count() result1 = str(expr1.compile()) result2 = str(expr2.compile()) assert result1 == result2
def test_batting_most_hits(players, players_df): expr = players.mutate(hits_rank=lambda t: t.H.rank().over( ibis.cumulative_window(order_by=ibis.desc(t.H)))) result = expr.execute() hits_rank = players_df.groupby('playerID').H.rank(method='min', ascending=False) expected = players_df.assign(hits_rank=hits_rank) tm.assert_frame_equal(result[expected.columns], expected)
def test_order_by_desc(alltypes): t = alltypes w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank` FROM ibis_testing.`alltypes`""" assert_sql_equal(proj, expected) expr = t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'), t.a.max()] expected = """\ SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`, max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected)
def test_order_by_desc(self): t = self.con.table('alltypes') w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank` FROM alltypes""" self._check_sql(proj, expected) expr = (t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'), t.a.max()]) expected = """\ SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`, max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max` FROM alltypes""" self._check_sql(expr, expected)
def test_complex_sort_by(t, df): expr = t.sort_by( [ibis.desc(t.plain_int64 * t.plain_float64), t.plain_float64]) result = expr.execute() expected = (df.assign(foo=df.plain_int64 * df.plain_float64).sort_values( ['foo', 'plain_float64'], ascending=[False, True]).drop(['foo'], axis=1).reset_index(drop=True)) tm.assert_frame_equal(result[expected.columns], expected)
def test_order_by_desc(self): t = self.con.table('alltypes') w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank` FROM alltypes""" self._check_sql(proj, expected) expr = (t.group_by('g') .order_by(ibis.desc(t.f)) [t.d.lag().name('foo'), t.a.max()]) expected = """\ SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`, max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max` FROM alltypes""" self._check_sql(expr, expected)
def test_count_on_order_by(db): t = db.batting sort_key = ibis.desc(t.playerID) sorted_table = t.sort_by(sort_key) expr = sorted_table.count() result = str( expr.compile().compile(compile_kwargs={'literal_binds': True})) expected = ('SELECT count(\'*\') AS count \n' 'FROM base.batting AS t0') # noqa: W291 assert result == expected
def test_order_by_desc(alltypes): t = alltypes w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank` FROM ibis_testing.`alltypes`""" assert_sql_equal(proj, expected) expr = t.group_by('g').order_by(ibis.desc(t.f))[ t.d.lag().name('foo'), t.a.max() ] expected = """\ SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`, max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected)
def log_scraper_first_incomplete_before_restart(expr, limit=100): restart_times = expr.filter( ((expr.severity == 'INFO') & (expr.msg.contains('OmniSci Server 5')))).select(['logtime']) results = [] for i, endtime in restart_times.execute(limit).iterrows(): # ceil to avoid ibis warning and dropping microseconds endtime = endtime.logtime.ceil('s') last_complete_end = expr.filter((expr.logtime < endtime) & expr.event.isin(['sql_execute', 'render_vega']))\ .sort_by(ibis.desc('logtime'))\ .limit(1)\ .select(['sequence']) last_complete_start = expr\ .filter(expr.event.isin(['sql_execute_begin', 'render_vega_begin']) & expr.sequence.isin(last_complete_end.sequence) & (expr.logtime < endtime))\ .sort_by(ibis.desc('logtime'))\ .select(['logtime', 'sequence']) for i, last_complete_start in last_complete_start.execute( 1).iterrows(): # floor to avoid ibis warning and dropping microseconds last_complete_start_time = last_complete_start.logtime.floor('s') incomplete = expr\ .filter( (expr.logtime < endtime) & (expr.logtime > last_complete_start_time) & (expr.sequence != last_complete_start.sequence) & (expr.event.isin(['sql_execute_begin', 'render_vega_begin'])))\ .sort_by('logtime')\ .select(['logtime', 'event', 'query', 'sequence', 'logfile']) results.append(incomplete.execute(1)) if results: df = pd.concat(results) df.drop_duplicates(inplace=True) return df else: return None
def test_count_on_order_by(db): t = db.batting sort_key = ibis.desc(t.playerID) sorted_table = t.sort_by(sort_key) expr = sorted_table.count() result = str( expr.compile().compile(compile_kwargs={'literal_binds': True})) expected = """\ SELECT count('*') AS count FROM "default".batting AS t0""" # noqa: W291 assert result == expected
def test_first_last_value(alltypes, df, func, expected_index): col = alltypes.sort_by(ibis.desc(alltypes.string_col)).double_col method = getattr(col, func) expr = method() result = expr.execute().rename('double_col') expected = pd.Series( df.double_col.iloc[expected_index], index=pd.RangeIndex(len(df)), name='double_col', ) tm.assert_series_equal(result, expected)
def test_memoize_insert_sort_key(con): table = con.table('airlines') t = table['arrdelay', 'dest'] expr = t.group_by('dest').mutate(dest_avg=t.arrdelay.mean(), dev=t.arrdelay - t.arrdelay.mean()) worst = expr[expr.dev.notnull()].sort_by(ibis.desc('dev')).limit(10) result = repr(worst) assert result.count('airlines') == 1
def test_count_on_order_by(con): t = con.table("batting") sort_key = ibis.desc(t.playerID) sorted_table = t.sort_by(sort_key) expr = sorted_table.count() result = str( expr.compile().compile(compile_kwargs={'literal_binds': True})) expected = ( "SELECT count('*') AS count \nFROM main.batting AS t0" # noqa: W291 ) assert result == expected
def test_batting_most_hits(players, players_df): expr = players.mutate( hits_rank=lambda t: t.H.rank().over( ibis.cumulative_window(order_by=ibis.desc(t.H)) ) ) result = expr.execute() hits_rank = players_df.groupby('playerID').H.rank( method='min', ascending=False ) expected = players_df.assign(hits_rank=hits_rank) tm.assert_frame_equal(result[expected.columns], expected)
def test_count_on_order_by(db): t = db.batting sort_key = ibis.desc(t.playerID) sorted_table = t.sort_by(sort_key) expr = sorted_table.count() result = str( expr.compile().compile(compile_kwargs={'literal_binds': True}) ) expected = ( 'SELECT count(\'*\') AS count \n' 'FROM base.batting AS t0' ) # noqa: W291 assert result == expected
def test_memoize_insert_sort_key(self): table = self.con.table('airlines') t = table['arrdelay', 'dest'] expr = (t.group_by('dest') .mutate(dest_avg=t.arrdelay.mean(), dev=t.arrdelay - t.arrdelay.mean())) worst = expr[expr.dev.notnull()].sort_by(ibis.desc('dev')).limit(10) result = repr(worst) assert result.count('airlines') == 1
def test_complex_sort_by(t, df): expr = t.sort_by( [ibis.desc(t.plain_int64 * t.plain_float64), t.plain_float64] ) result = expr.execute() expected = ( df.assign(foo=df.plain_int64 * df.plain_float64) .sort_values(['foo', 'plain_float64'], ascending=[False, True]) .drop(['foo'], axis=1) .reset_index(drop=True) ) tm.assert_frame_equal(result[expected.columns], expected)
def test_sort_aggregation_translation_failure(self): # This works around a nuance with our choice to hackishly fuse SortBy # after Aggregate to produce a single select statement rather than an # inline view. t = self.alltypes sat = self.sa_alltypes.alias("t0") agg = t.group_by("string_col").aggregate(t.double_col.max().name("foo")) expr = agg.sort_by(ibis.desc("foo")) ex = ( sa.select([sat.c.string_col, F.max(sat.c.double_col).label("foo")]) .group_by(sat.c.string_col) .order_by(sa.desc("foo")) ) self._compare_sqla(expr, ex)
@pytest.mark.xfail( raises=AttributeError, reason='TableColumn does not implement limit' ) @pytest.mark.parametrize('offset', [0, 2]) def test_series_limit(t, df, offset): n = 5 s_expr = t.plain_int64.limit(n, offset=offset) result = s_expr.execute() tm.assert_series_equal(result, df.plain_int64.iloc[offset : offset + n]) @pytest.mark.parametrize( ('key', 'pandas_by', 'pandas_ascending'), [ (lambda t, col: [ibis.desc(t[col])], lambda col: [col], False), ( lambda t, col: [t[col], ibis.desc(t.plain_int64)], lambda col: [col, 'plain_int64'], [True, False], ), ( lambda t, col: [ibis.desc(t.plain_int64 * 2)], lambda col: ['plain_int64'], False, ), ], ) @pytest.mark.parametrize( 'column', ['plain_datetimes_naive', 'plain_datetimes_ny', 'plain_datetimes_utc'],
has_answer_boolean = projection.answer_count > 0 # [END bigquery_ibis_transform_integer] # [START bigquery_ibis_transform_boolean] has_answer_int = has_answer_boolean.ifelse(1, 0) # [END bigquery_ibis_transform_boolean] # [START bigquery_ibis_aggregate] total_questions = projection.count() percentage_answered = has_answer_int.mean() * 100 # [END bigquery_ibis_aggregate] # [START bigquery_ibis_group_by] expression = projection.groupby('year').aggregate( total_questions=total_questions, percentage_answered=percentage_answered, ).sort_by(ibis.desc(projection.year)) # [END bigquery_ibis_group_by] print('\nExecuting query:') # [START bigquery_ibis_execute] print(expression.execute()) # year total_questions percentage_answered # 0 2018 997508 66.776307 # 1 2017 2318405 75.898732 # 2 2016 2226478 84.193197 # 3 2015 2219791 86.170365 # 4 2014 2164895 88.356987 # 5 2013 2060753 91.533241 # 6 2012 1645498 94.510659 # 7 2011 1200601 97.149261 # 8 2010 694410 99.060497