def test_apply_set_filter_for_metric_in_primary_dataset_query(self): queries = (mock_dataset_blender.query().widget( f.ReactTable( mock_dataset_blender.fields["candidate-spend-per-wins"]) ).dimension(f.day(mock_dataset_blender.fields.timestamp)).filter( f.ResultSet(mock_dataset_blender.fields["votes"].gt(10)))).sql self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " '"sq0"."$timestamp" "$timestamp",' '"sq0"."$set(SUM(votes)>10)" "$set(SUM(votes)>10)",' '"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" ' "FROM (" "SELECT " 'TRUNC("timestamp",\'DD\') "$timestamp",' 'CASE WHEN SUM("votes")>10 THEN \'set(SUM(votes)>10)\' ELSE \'complement(SUM(votes)>10)\' END "$set(SUM(votes)>10)",' 'SUM("is_winner") "$wins" ' 'FROM "politics"."politician" ' 'GROUP BY "$timestamp"' ') "sq0" ' "LEFT JOIN (" "SELECT " 'TRUNC("timestamp",\'DD\') "$timestamp",' 'SUM("candidate_spend") "$candidate-spend" ' 'FROM "politics"."politician_spend" ' 'GROUP BY "$timestamp"' ') "sq1" ' "ON " '"sq0"."$timestamp"="sq1"."$timestamp" ' 'ORDER BY "$timestamp" ' 'LIMIT 200000', str(queries[0]), )
def test_dimension_is_replaced_by_default_in_the_target_dimension_place_when_result_set_filter_is_present( self, ): queries = ( ds.query.widget(f.Pandas(ds.fields.aggr_number)) .dimension(ds.fields.date) .dimension(ds.fields.text) .dimension(ds.fields.boolean) .filter(f.ResultSet(ds.fields.text == "abc")) .sql ) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " '"date" "$date",' "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$text\"," '"boolean" "$boolean",' 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'GROUP BY "$date","$text","$boolean" ' 'ORDER BY "$date","$text","$boolean" ' 'LIMIT 200000', str(queries[0]), )
def test_dimension_breaks_complement_down_even_when_both_labels_are_set_but_wont_group_complement( self, ): queries = ( ds.query.widget(f.Pandas(ds.fields.aggr_number)) .dimension(ds.fields.text) .filter( f.ResultSet( ds.fields.text == "abc", set_label="IS ABC", complement_label="OTHERS", will_group_complement=False, ) ) .sql ) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " "CASE WHEN \"text\"='abc' THEN 'IS ABC' ELSE \"text\" END \"$text\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' "GROUP BY \"$text\" " "ORDER BY \"$text\" " "LIMIT 200000", str(queries[0]), )
def test_dimension_is_inserted_before_conditional_dimension_when_result_set_filter_wont_ignore_dimensions( self, ): queries = ( ds.query.widget(f.Pandas(ds.fields.aggr_number)) .dimension(ds.fields.text) .filter( f.ResultSet( ds.fields.text == "abc", will_replace_referenced_dimension=False ) ) .sql ) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$set(text='abc')\"," '"text" "$text",' 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'GROUP BY "$set(text=\'abc\')","$text" ' 'ORDER BY "$set(text=\'abc\')","$text" ' 'LIMIT 200000', str(queries[0]), )
def test_apply_set_filter_for_dimension_that_is_also_being_fetched_in_both_dataset_queries( self): queries = (mock_dataset_blender.query().widget( f.ReactTable( mock_dataset_blender.fields["candidate-spend-per-wins"]) ).dimension(mock_dataset_blender.fields['candidate-id']).filter( f.ResultSet( mock_dataset_blender.fields['candidate-id'] == 12))).sql self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " '"sq0"."$candidate-id" "$candidate-id",' '"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" ' "FROM (" "SELECT " 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'SUM("is_winner") "$wins" ' 'FROM "politics"."politician" ' 'GROUP BY "$candidate-id"' ') "sq0" ' "LEFT JOIN (" "SELECT " 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'SUM("candidate_spend") "$candidate-spend" ' 'FROM "politics"."politician_spend" ' 'GROUP BY "$candidate-id"' ') "sq1" ' "ON " '"sq0"."$candidate-id"="sq1"."$candidate-id" ' 'ORDER BY "$candidate-id" ' 'LIMIT 200000', str(queries[0]), )
def test_dimension_with_dimension_modifier_is_replaced_by_default_when_result_set_filter_is_present( self, ): queries = (ds.query.widget(f.Pandas(ds.fields.aggr_number)).dimension( ds.fields.date).dimension(f.Rollup(ds.fields.boolean)).filter( f.ResultSet(ds.fields.boolean == True)).sql) self.assertEqual(len(queries), 2) with self.subTest('base query is the same as without totals'): self.assertEqual( "SELECT " '"date" "$date",' "CASE WHEN \"boolean\"=true THEN 'set(boolean=true)' ELSE 'complement(boolean=true)' END \"$boolean\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'GROUP BY "$date","$boolean" ' 'ORDER BY "$date","$boolean" ' 'LIMIT 200000', str(queries[0]), ) with self.subTest( 'totals dimension is replaced with _FIREANT_ROLLUP_VALUE_'): self.assertEqual( "SELECT " '"date" "$date",' '\'_FIREANT_ROLLUP_VALUE_\' "$boolean",' 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'GROUP BY "$date" ' 'ORDER BY "$date","$boolean" ' 'LIMIT 200000', str(queries[1]), )
def test_deeply_nested_dimension_filter_with_sets_for_data_blending(self): field_alias = 'state' fltr = mock_dataset_blender.fields.state.like( fn.Concat( fn.Upper(fn.Trim(fn.Concat('%ab', mock_dataset_blender.fields['candidate-id']))), mock_dataset_blender.fields.winner, fn.Concat(mock_dataset_blender.fields.timestamp.between('date1', 'date2'), 'c%'), ) ) queries = ( mock_dataset_blender.query.widget(f.Pandas(mock_dataset_blender.fields['candidate-spend'])) .dimension(mock_dataset_blender.fields[field_alias]) .filter(f.ResultSet(fltr, set_label='set_A', complement_label='set_B')) .sql ) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " f'"sq0"."${field_alias}" "${field_alias}",' '"sq0"."$candidate-spend" "$candidate-spend" ' 'FROM (' 'SELECT ' f'CASE WHEN {fltr} THEN \'set_A\' ELSE \'set_B\' END "${field_alias}",' 'SUM("candidate_spend") "$candidate-spend" ' 'FROM "politics"."politician_spend" ' f'GROUP BY "${field_alias}"' f') "sq0" ' f"ORDER BY \"${field_alias}\" " "LIMIT 200000", str(queries[0]), )
def test_deeply_nested_dimension_filter_with_sets(self): field_alias = 'text' fltr = ds.fields.text.like( fn.Concat( fn.Upper(fn.Trim(fn.Concat('%ab', ds.fields.number))), ds.fields.aggr_number, fn.Concat(ds.fields.date.between('date1', 'date2'), 'c%'), )) queries = (ds.query.widget(f.Pandas(ds.fields.aggr_number)).dimension( ds.fields[field_alias]).filter( f.ResultSet(fltr, set_label='set_A', complement_label='set_B')).sql) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " f"CASE WHEN {fltr} THEN 'set_A' ELSE 'set_B' END \"${field_alias}\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' f"GROUP BY \"${field_alias}\" " f"ORDER BY \"${field_alias}\" " "LIMIT 200000", str(queries[0]), )
def test_dimension_is_replaced_when_references_are_present(self): queries = (ds.query.widget(f.Pandas(ds.fields.aggr_number)).dimension( ds.fields.date).dimension(ds.fields.boolean).reference( f.WeekOverWeek(ds.fields.date)).filter( f.ResultSet(ds.fields.text == "abc")).sql) self.assertEqual(len(queries), 2) with self.subTest("base query"): self.assertEqual( "SELECT " '"date" "$date",' '"boolean" "$boolean",' "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$text\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'GROUP BY "$date","$boolean","$text" ' 'ORDER BY "$date","$boolean","$text" ' 'LIMIT 200000', str(queries[0]), ) with self.subTest("ref query"): self.assertEqual( "SELECT " 'TIMESTAMPADD(week,1,"date") "$date",' '"boolean" "$boolean",' "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$text\"," 'SUM("number") "$aggr_number_wow" ' 'FROM "test" ' 'GROUP BY "$date","$boolean","$text" ' 'ORDER BY "$date","$boolean","$text" ' 'LIMIT 200000', str(queries[1]), )
def test_dimension_uses_both_set_and_complement_label_kwargs_when_available(self): queries = ( ds.query.widget(f.Pandas(ds.fields.aggr_number)) .dimension(ds.fields.text) .filter( f.ResultSet( ds.fields.text == "abc", set_label="Text is ABC", complement_label="Text is NOT ABC", ) ) .sql ) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " "CASE WHEN \"text\"='abc' THEN 'Text is ABC' ELSE 'Text is NOT ABC' END " "\"$text\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' "GROUP BY \"$text\" " "ORDER BY \"$text\" " "LIMIT 200000", str(queries[0]), )
def test_dimension_filter_variations_with_sets(self): for field_alias, fltr in [ ('text', ds.fields.text.like("%abc%")), ('text', ds.fields.text.not_like("%abc%")), ('text', ds.fields.text.isin(["abc"])), ('text', ds.fields.text.notin(["abc"])), ('date', ds.fields.date.between('date1', 'date2')), ('number', ds.fields.number.between(5, 15)), ('number', ds.fields.number.isin([1, 2, 3])), ('number', ds.fields.number.notin([1, 2, 3])), ]: fltr_sql = fltr.definition.get_sql(quote_char="") with self.subTest(fltr_sql): queries = ( ds.query.widget(f.Pandas(ds.fields.aggr_number)) .dimension(ds.fields[field_alias]) .filter(f.ResultSet(fltr, set_label='set_A', complement_label='set_B')) .sql ) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " f"CASE WHEN {fltr} THEN 'set_A' ELSE 'set_B' END \"${field_alias}\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' f"GROUP BY \"${field_alias}\" " f"ORDER BY \"${field_alias}\" " "LIMIT 200000", str(queries[0]), )
def test_dimension_is_inserted_in_dimensions_even_when_not_selected(self): queries = ds.query.widget(f.Pandas(ds.fields.aggr_number)).filter(f.ResultSet(ds.fields.text == "abc")).sql self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$text\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' "GROUP BY \"$text\" " "ORDER BY \"$text\" " "LIMIT 200000", str(queries[0]), )
def test_dimension_filter_variations_with_sets_for_data_blending(self): for field_alias, fltr in [ ('state', mock_dataset_blender.fields.state.like("%abc%")), ('state', mock_dataset_blender.fields.state.not_like("%abc%")), ('state', mock_dataset_blender.fields.state.isin(["abc"])), ('state', mock_dataset_blender.fields.state.notin(["abc"])), ('timestamp', mock_dataset_blender.fields.timestamp.between('date1', 'date2')), ('candidate-id', mock_dataset_blender.fields['candidate-id'].between(5, 15)), ('candidate-id', mock_dataset_blender.fields['candidate-id'].isin([1, 2, 3])), ('candidate-id', mock_dataset_blender.fields['candidate-id'].notin([1, 2, 3])), ]: fltr_definition = fltr.definition while hasattr(fltr_definition, 'definition'): fltr_definition = fltr_definition.definition fltr_sql = fltr_definition.get_sql(quote_char="") with self.subTest(fltr_sql): queries = (mock_dataset_blender.query.widget( f.Pandas(mock_dataset_blender.fields['candidate-spend']) ).dimension(mock_dataset_blender.fields[field_alias]).filter( f.ResultSet(fltr, set_label='set_A', complement_label='set_B')).sql) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " f'"sq0"."${field_alias}" "${field_alias}",' '"sq1"."$candidate-spend" "$candidate-spend" ' 'FROM (' 'SELECT ' f'CASE WHEN {fltr} THEN \'set_A\' ELSE \'set_B\' END "${field_alias}" ' 'FROM "politics"."politician" ' f'GROUP BY "${field_alias}"' ') "sq0" LEFT JOIN (' 'SELECT ' f'CASE WHEN {fltr} THEN \'set_A\' ELSE \'set_B\' END "${field_alias}",' 'SUM("candidate_spend") "$candidate-spend" ' 'FROM "politics"."politician_spend" ' f'GROUP BY "${field_alias}"' f') "sq1" ON "sq0"."${field_alias}"="sq1"."${field_alias}" ' f"ORDER BY \"${field_alias}\" " "LIMIT 200000", str(queries[0]), )
def test_no_metric_is_removed_when_result_set_metric_filter_is_present(self): queries = ds.query.widget(f.Pandas(ds.fields.aggr_number)).filter(f.ResultSet(ds.fields.aggr_number > 10)).sql self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " "CASE WHEN SUM(\"number\")>10 THEN 'set(SUM(number)>10)' " "ELSE 'complement(SUM(number)>10)' END \"$set(SUM(number)>10)\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'ORDER BY 1 ' 'LIMIT 200000', str(queries[0]), )
def test_builder_dimensions_as_arg_with_a_non_replaced_set_dimension( self, mock_fetch_data: Mock, *args ): mock_widget = f.Widget(mock_dataset.fields.votes) mock_widget.transform = Mock() dimensions = [mock_dataset.fields.state] set_filter = f.ResultSet(dimensions[0]=='On', will_replace_referenced_dimension=False) mock_dataset.query.widget(mock_widget).dimension(*dimensions).filter(set_filter).fetch() set_dimension = _make_set_dimension(set_filter, mock_dataset) mock_fetch_data.assert_called_once_with( ANY, ANY, FieldMatcher(set_dimension, dimensions[0]), ANY, ANY )
def test_dimension_is_replaced_by_default_when_result_set_filter_is_present( self): queries = (ds.query.widget(f.Pandas(ds.fields.aggr_number)).dimension( ds.fields.text).filter(f.ResultSet(ds.fields.text == "abc")).sql) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$text\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' "GROUP BY \"$text\" " "ORDER BY \"$text\" " "LIMIT 200000", str(queries[0]), )
def test_apply_set_filter_for_dimension_in_tertiary_dataset_query(self): blender_dataset_with_staff = mock_dataset_blender.blend( mock_staff_dataset).on_dimensions() queries = (blender_dataset_with_staff.query().widget( f.ReactTable(blender_dataset_with_staff.fields.num_staff) ).dimension( blender_dataset_with_staff.fields['political_party']).filter( f.ResultSet( blender_dataset_with_staff.fields["candidate-id"] == 12)) ).sql self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " '"sq0"."$political_party" "$political_party",' '"sq0"."$candidate-id" "$candidate-id",' '"sq2"."$num_staff" "$num_staff" ' "FROM (" "SELECT " '"political_party" "$political_party",' 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id" ' 'FROM "politics"."politician" ' 'GROUP BY "$political_party","$candidate-id"' ') "sq0" ' "LEFT JOIN (" "SELECT " 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id" ' 'FROM "politics"."politician_spend" ' 'GROUP BY "$candidate-id"' ') "sq1" ' "ON " '"sq0"."$candidate-id"="sq1"."$candidate-id" ' "LEFT JOIN (" "SELECT " 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'COUNT("staff_id") "$num_staff" ' 'FROM "politics"."politician_staff" ' 'GROUP BY "$candidate-id"' ') "sq2" ' "ON " '"sq0"."$candidate-id"="sq2"."$candidate-id" ' 'ORDER BY "$political_party","$candidate-id" ' 'LIMIT 200000', str(queries[0]), )
def test_dimension_is_inserted_as_last_dimension_when_not_selected(self): queries = (ds.query.widget(f.Pandas(ds.fields.aggr_number)).dimension( ds.fields.date).dimension(ds.fields.boolean).filter( f.ResultSet(ds.fields.text == "abc")).sql) self.assertEqual(len(queries), 1) self.assertEqual( "SELECT " '"date" "$date",' '"boolean" "$boolean",' "CASE WHEN \"text\"='abc' THEN 'set(text=''abc'')' ELSE 'complement(text=''abc'')' END \"$text\"," 'SUM("number") "$aggr_number" ' 'FROM "test" ' 'GROUP BY "$date","$boolean","$text" ' 'ORDER BY "$date","$boolean","$text" ' 'LIMIT 200000', str(queries[0]), )
def test_apply_set_filter_for_dimension_that_is_also_being_fetched_in_tertiary_dataset_query(self): blender_dataset_with_staff = mock_dataset_blender.blend(mock_staff_dataset).on_dimensions() queries = ( blender_dataset_with_staff.query() .widget(f.ReactTable(blender_dataset_with_staff.fields.num_staff)) .dimension(blender_dataset_with_staff.fields['candidate-id']) .filter(f.ResultSet(blender_dataset_with_staff.fields["candidate-id"] == 12)) ).sql self.assertEqual(len(queries), 1) self.assertEqual( 'SELECT "sq0"."$candidate-id" "$candidate-id","sq0"."$num_staff" "$num_staff" ' 'FROM (' 'SELECT CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ' 'ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'COUNT("staff_id") "$num_staff" ' 'FROM "politics"."politician_staff" ' 'GROUP BY "$candidate-id"' ') "sq0" ' 'ORDER BY "$candidate-id" LIMIT 200000', str(queries[0]), )
def test_apply_set_filter_for_dimension_with_reference_in_both_dataset_queries( self): queries = (mock_dataset_blender.query().widget( f.ReactTable( mock_dataset_blender.fields["candidate-spend-per-wins"]) ).dimension(f.day(mock_dataset_blender.fields.timestamp)).reference( f.WeekOverWeek(mock_dataset_blender.fields.timestamp)).filter( f.ResultSet( mock_dataset_blender.fields['candidate-id'] == 12))).sql self.assertEqual(len(queries), 2) with self.subTest("base query"): self.assertEqual( "SELECT " '"sq0"."$timestamp" "$timestamp",' '"sq0"."$candidate-id" "$candidate-id",' '"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" ' "FROM (" "SELECT " 'TRUNC("timestamp",\'DD\') "$timestamp",' 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'SUM("is_winner") "$wins" ' 'FROM "politics"."politician" ' 'GROUP BY "$timestamp","$candidate-id"' ') "sq0" ' "LEFT JOIN (" "SELECT " 'TRUNC("timestamp",\'DD\') "$timestamp",' 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'SUM("candidate_spend") "$candidate-spend" ' 'FROM "politics"."politician_spend" ' 'GROUP BY "$timestamp","$candidate-id"' ') "sq1" ' "ON " '"sq0"."$timestamp"="sq1"."$timestamp" ' 'AND "sq0"."$candidate-id"="sq1"."$candidate-id" ' 'ORDER BY "$timestamp","$candidate-id" ' 'LIMIT 200000', str(queries[0]), ) with self.subTest("ref query"): self.assertEqual( "SELECT " '"sq0"."$timestamp" "$timestamp",' '"sq0"."$candidate-id" "$candidate-id",' '"sq1"."$candidate-spend_wow"/"sq0"."$wins_wow" "$candidate-spend-per-wins_wow" ' "FROM (" "SELECT " 'TRUNC(TIMESTAMPADD(week,1,TRUNC("timestamp",\'DD\')),\'DD\') "$timestamp",' 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ' 'ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'SUM("is_winner") "$wins_wow" ' 'FROM "politics"."politician" ' 'GROUP BY "$timestamp","$candidate-id"' ') "sq0" ' "LEFT JOIN (" "SELECT " 'TRUNC(TIMESTAMPADD(week,1,TRUNC("timestamp",\'DD\')),\'DD\') "$timestamp",' 'CASE WHEN "candidate_id"=12 THEN \'set(candidate_id=12)\' ' 'ELSE \'complement(candidate_id=12)\' END "$candidate-id",' 'SUM("candidate_spend") "$candidate-spend_wow" ' 'FROM "politics"."politician_spend" ' 'GROUP BY "$timestamp","$candidate-id"' ') "sq1" ' "ON " '"sq0"."$timestamp"="sq1"."$timestamp" ' 'AND "sq0"."$candidate-id"="sq1"."$candidate-id" ' 'ORDER BY "$timestamp","$candidate-id" ' 'LIMIT 200000', str(queries[1]), )