def test_self_join(table): # Self-joins are problematic with this design because column # expressions may reference either the left or right For example: # # SELECT left.key, sum(left.value - right.value) as total_deltas # FROM table left # INNER JOIN table right # ON left.current_period = right.previous_period + 1 # GROUP BY 1 # # One way around the self-join issue is to force the user to add # prefixes to the joined fields, then project using those. Not that # satisfying, though. left = table right = table.view() metric = (left['a'] - right['b']).mean().name('metric') joined = left.inner_join(right, [right['g'] == left['g']]) # basic check there's no referential problems result_repr = repr(joined) assert 'ref_0' in result_repr assert 'ref_1' in result_repr # Cannot be immediately materialized because of the schema overlap with pytest.raises(RelationError): joined.materialize() # Project out left table schema proj = joined[[left]] assert_equal(proj.schema(), left.schema()) # Try aggregating on top of joined aggregated = joined.aggregate([metric], by=[left['g']]) ex_schema = api.Schema(['g', 'metric'], ['string', 'double']) assert_equal(aggregated.schema(), ex_schema)
def test_cross_join(table): agg_exprs = [table['a'].sum().name('sum_a'), table['b'].mean().name('mean_b')] scalar_aggs = table.aggregate(agg_exprs) joined = table.cross_join(scalar_aggs).materialize() agg_schema = api.Schema(['sum_a', 'mean_b'], ['int64', 'double']) ex_schema = table.schema().append(agg_schema) assert_equal(joined.schema(), ex_schema)
def test_equijoin_schema_merge(): table1 = ibis.table([('key1', 'string'), ('value1', 'double')]) table2 = ibis.table([('key2', 'string'), ('stuff', 'int32')]) pred = table1['key1'] == table2['key2'] join_types = ['inner_join', 'left_join', 'outer_join'] ex_schema = api.Schema(['key1', 'value1', 'key2', 'stuff'], ['string', 'double', 'string', 'int32']) for fname in join_types: f = getattr(table1, fname) joined = f(table2, [pred]).materialize() assert_equal(joined.schema(), ex_schema)