def test_scalar_parameter(db): start = ibis.param(dt.date) end = ibis.param(dt.date) t = db.functional_alltypes col = t.date_string_col.cast('date') expr = col.between(start, end) start_string, end_string = '2009-03-01', '2010-07-03' result = expr.execute(params={start: start_string, end: end_string}) expected = col.between(start_string, end_string).execute() tm.assert_series_equal(result, expected)
def test_scalar_parameter(con): start = ibis.param(dt.date) end = ibis.param(dt.date) t = con.table('functional_alltypes') col = t.date_string_col.cast('date') expr = col.between(start, end) start_string, end_string = '2009-03-01', '2010-07-03' result = expr.execute(params={start: start_string, end: end_string}) expected = col.between(start_string, end_string).execute() tm.assert_series_equal(result, expected)
def test_date_scalar_parameter(backend, alltypes, start_string, end_string): start, end = ibis.param(dt.date), ibis.param(dt.date) col = alltypes.timestamp_col.date() expr = col.between(start, end) expected_expr = col.between(start_string, end_string) result = expr.execute(params={start: start_string, end: end_string}) expected = expected_expr.execute() backend.assert_series_equal(result, expected)
def test_date_scalar_parameter( backend, alltypes, df, start_string, end_string ): start, end = ibis.param(dt.date), ibis.param(dt.date) col = alltypes.timestamp_col.date() expr = col.between(start, end) expected_expr = col.between(start_string, end_string) result = expr.execute(params={start: start_string, end: end_string}) expected = expected_expr.execute() backend.assert_series_equal(result, expected)
def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): value = ibis.param(dt.double) expr = alltypes[column] + value expected = df[column] + raw_value result = expr.execute(params={value: raw_value}) expected = backend.default_series_rename(expected) backend.assert_series_equal(result, expected, check_dtype=False)
def test_scalar_param_nested(client): param = ibis.param('struct<x: array<struct<y: array<double>>>>') value = collections.OrderedDict( [('x', [collections.OrderedDict([('y', [1.0, 2.0, 3.0])])])] ) result = client.execute(param, {param: value}) assert value == result
def test_subquery_scalar_params(alltypes, project_id): t = alltypes param = ibis.param('timestamp').name('my_param') expr = ( t[['float_col', 'timestamp_col', 'int_col', 'string_col']][ lambda t: t.timestamp_col < param ] .groupby('string_col') .aggregate(foo=lambda t: t.float_col.sum()) .foo.count() ) result = expr.compile(params={param: '20140101'}) expected = """\ SELECT count(`foo`) AS `count` FROM ( SELECT `string_col`, sum(`float_col`) AS `foo` FROM ( SELECT `float_col`, `timestamp_col`, `int_col`, `string_col` FROM `{}.testing.functional_alltypes` WHERE `timestamp_col` < @my_param ) t1 GROUP BY 1 ) t0""".format( project_id ) assert result == expected
def test_scalar_param_scope(alltypes): t = alltypes param = ibis.param("timestamp") mut = t.mutate(param=param).compile(params={param: "2017-01-01"}) assert (mut == """\ SELECT *, @param AS `param` FROM functional_alltypes""")
def test_scalar_param_partition_time(parted_alltypes): assert 'PARTITIONTIME' in parted_alltypes.columns assert 'PARTITIONTIME' in parted_alltypes.schema() param = ibis.param('timestamp').name('time_param') expr = parted_alltypes[parted_alltypes.PARTITIONTIME < param] df = expr.execute(params={param: '2017-01-01'}) assert df.empty
def test_window_grouping_key_has_scope(t, df): param = ibis.param(dt.string) window = ibis.window(group_by=t.dup_strings + param) expr = t.plain_int64.mean().over(window) result = expr.execute(params={param: "a"}) expected = df.groupby(df.dup_strings + "a").plain_int64.transform("mean") tm.assert_series_equal(result, expected)
def test_subquery_where_location(): t = ibis.table( [ ("float_col", "float32"), ("timestamp_col", "timestamp"), ("int_col", "int32"), ("string_col", "string"), ], name="alltypes", ) param = ibis.param("timestamp").name("my_param") expr = (t[[ "float_col", "timestamp_col", "int_col", "string_col" ]][lambda t: t.timestamp_col < param].groupby("string_col").aggregate( foo=lambda t: t.float_col.sum()).foo.count()) result = Compiler.to_sql(expr, params={param: "20140101"}) expected = """\ SELECT count(`foo`) AS `count` FROM ( SELECT `string_col`, sum(`float_col`) AS `foo` FROM ( SELECT `float_col`, `timestamp_col`, `int_col`, `string_col` FROM alltypes ) t1 WHERE `timestamp_col` < '20140101' GROUP BY 1 ) t0""" assert result == expected
def test_scalar_param_scope(alltypes, project_id, dataset_id): t = alltypes param = ibis.param("timestamp") mut = t.mutate(param=param).compile(params={param: "2017-01-01"}) assert (mut == """\ SELECT *, @param AS `param` FROM `{}.{}.functional_alltypes`""".format(project_id, dataset_id))
def test_repr_struct_of_array_of_struct(): param = ibis.param('struct<x: array<struct<y: array<double>>>>') param = param.name('foo') value = collections.OrderedDict([ ( 'x', [ collections.OrderedDict([ ('y', [1.0, 2.0, 3.0]) ]) ] ) ]) result = bigquery_param(param, value) expected = { 'name': 'foo', 'parameterType': { 'structTypes': [ { 'name': 'x', 'type': { 'arrayType': { 'structTypes': [ { 'name': 'y', 'type': { 'arrayType': {'type': 'FLOAT64'}, 'type': 'ARRAY' } } ], 'type': 'STRUCT' }, 'type': 'ARRAY' } } ], 'type': 'STRUCT' }, 'parameterValue': { 'structValues': { 'x': { 'arrayValues': [ { 'structValues': { 'y': { 'arrayValues': [ {'value': 1.0}, {'value': 2.0}, {'value': 3.0} ] } } } ] } } } } assert result.to_api_repr() == expected
def test_scalar_param_scope(alltypes, project_id): t = alltypes param = ibis.param('timestamp') mut = t.mutate(param=param).compile(params={param: '2017-01-01'}) assert mut == """\ SELECT *, @param AS `param` FROM `{}.testing.functional_alltypes`""".format(project_id)
def test_scalar_param_struct(backend, con): value = collections.OrderedDict([('a', 1), ('b', 'abc'), ('c', 3.0)]) param = ibis.param( dt.Struct.from_tuples([('a', 'int64'), ('b', 'string'), ('c', 'float64')])) result = con.execute(param.a, params={param: value}) assert result == value['a']
def test_scalar_param_nested(client): param = ibis.param('struct<x: array<struct<y: array<double>>>>') value = collections.OrderedDict([ ('x', [collections.OrderedDict([('y', [1.0, 2.0, 3.0])])]) ]) result = client.execute(param, {param: value}) assert value == result
def test_scalar_param_struct(client): if IBIS_VERSION < IBIS_1_4_VERSION: pytest.skip("requires ibis 1.4+") struct_type = dt.Struct.from_tuples([("x", dt.int64), ("y", dt.string)]) param = ibis.param(struct_type) value = collections.OrderedDict([("x", 1), ("y", "foobar")]) result = client.execute(param, {param: value}) assert value == result
def test_repr_struct_of_array_of_struct(): param = ibis.param("struct<x: array<struct<y: array<double>>>>") param = param.name("foo") value = collections.OrderedDict([ ("x", [collections.OrderedDict([("y", [1.0, 2.0, 3.0])])]) ]) result = bigquery_param(param, value) expected = { "name": "foo", "parameterType": { "structTypes": [{ "name": "x", "type": { "arrayType": { "structTypes": [{ "name": "y", "type": { "arrayType": { "type": "FLOAT64" }, "type": "ARRAY", }, }], "type": "STRUCT", }, "type": "ARRAY", }, }], "type": "STRUCT", }, "parameterValue": { "structValues": { "x": { "arrayValues": [{ "structValues": { "y": { "arrayValues": [ { "value": 1.0 }, { "value": 2.0 }, { "value": 3.0 }, ] } } }] } } }, } assert result.to_api_repr() == expected
def test_scalar_param_partition_time(parted_alltypes): if IBIS_VERSION < IBIS_1_4_VERSION: pytest.skip("requires ibis 1.4+") assert "PARTITIONTIME" in parted_alltypes.columns assert "PARTITIONTIME" in parted_alltypes.schema() param = ibis.param("timestamp").name("time_param") expr = parted_alltypes[parted_alltypes.PARTITIONTIME < param] df = expr.execute(params={param: "2017-01-01"}) assert df.empty
def test_scalar_param_struct(backend, con): value = collections.OrderedDict([('a', 1), ('b', 'abc'), ('c', 3.0)]) param = ibis.param( dt.Struct.from_tuples( [('a', 'int64'), ('b', 'string'), ('c', 'float64')] ) ) result = con.execute(param.a, params={param: value}) assert result == value['a']
def test_scalar_param_array(alltypes, df): if IBIS_VERSION < IBIS_1_4_VERSION: pytest.skip("requires ibis 1.4+") param = ibis.param("array<double>") expr = alltypes.sort_by("id").limit(1).double_col.collect() + param result = expr.execute(params={param: [1]}) expected = [df.sort_values("id").double_col.iat[0]] + [1.0] assert all( float(result[index]) == expected[index] for index in range(len(expected)))
def test_timestamp_accepts_date_literals(alltypes): date_string = '2009-03-01' param = ibis.param(dt.timestamp, name='param') expr = alltypes.mutate(param=param) params = {param: date_string} result = expr.compile(params=params) expected = """\ SELECT *, @param AS `param` FROM testing.functional_alltypes""" assert result == expected
def test_timestamp_accepts_date_literals(alltypes): date_string = "2009-03-01" param = ibis.param(dt.timestamp).name("param_0") expr = alltypes.mutate(param=param) params = {param: date_string} result = expr.compile(params=params) expected = f"""\ SELECT *, @param AS `param` FROM functional_alltypes""" assert result == expected
def test_timestamp_accepts_date_literals(alltypes, project_id): date_string = '2009-03-01' param = ibis.param(dt.timestamp).name('param_0') expr = alltypes.mutate(param=param) params = {param: date_string} result = expr.compile(params=params) expected = f"""\ SELECT *, @param AS `param` FROM `{project_id}.testing.functional_alltypes`""" assert result == expected
def test_timestamp_accepts_date_literals(backend, alltypes): date_string = '2009-03-01' param = ibis.param(dt.timestamp, name='param') expr = alltypes.mutate(param=param) params = {param: date_string} param_in_expr = expr.op().args[1][-1] assert param_in_expr in params assert param_in_expr.equals(param)
def test_scalar_param_nested(client): if IBIS_VERSION < IBIS_1_4_VERSION: pytest.skip("requires ibis 1.4+") param = ibis.param("struct<x: array<struct<y: array<double>>>>") value = collections.OrderedDict([ ("x", [collections.OrderedDict([("y", [1.0, 2.0, 3.0])])]) ]) result = client.execute(param, {param: value}) assert all( float(res) == float(val) for res, val in zip(value["x"][0]["y"], result["x"][0]["y"]))
def test_scalar_param_boolean(alltypes, df): param = ibis.param('boolean') expr = alltypes[(alltypes.string_col.cast('int64') == 0) == param] bool_value = True result = expr.execute(params={ param: bool_value }).sort_values('id').reset_index(drop=True) expected = df.loc[df.string_col.astype('int64') == 0].sort_values( 'id').reset_index(drop=True) tm.assert_frame_equal(result, expected)
def test_scalar_param_double(alltypes, df): param = ibis.param('double') expr = alltypes[alltypes.string_col.cast('int64').cast('double') == param] double_value = 0.0 result = expr.execute(params={ param: double_value }).sort_values('id').reset_index(drop=True) expected = df.loc[df.string_col.astype('int64').astype('float64') == double_value].sort_values('id').reset_index(drop=True) tm.assert_frame_equal(result, expected)
def test_scalar_param_date(alltypes, df, date_value): param = ibis.param('date') expr = alltypes[alltypes.timestamp_col.cast('date') <= param] result = expr.execute(params={ param: date_value }).sort_values('timestamp_col').reset_index(drop=True) value = pd.Timestamp(date_value) expected = df.loc[df.timestamp_col.dt.normalize() <= value].sort_values( 'timestamp_col').reset_index(drop=True) tm.assert_frame_equal(result, expected)
def test_scalar_param_string(alltypes, df): param = ibis.param('string') expr = alltypes[alltypes.string_col == param] string_value = '0' result = expr.execute(params={ param: string_value }).sort_values('id').reset_index(drop=True) expected = df.loc[df.string_col == string_value].sort_values( 'id').reset_index(drop=True) tm.assert_frame_equal(result, expected)
def test_scalar_param_int64(alltypes, df): param = ibis.param('int64') expr = alltypes[alltypes.string_col.cast('int64') == param] int64_value = 0 result = (expr.execute(params={ param: int64_value }).sort_values('id').reset_index(drop=True)) expected = (df.loc[df.string_col.astype('int64') == int64_value].sort_values('id').reset_index(drop=True)) tm.assert_frame_equal(result, expected)
def test_repr_struct_of_array_of_struct(): param = ibis.param('struct<x: array<struct<y: array<double>>>>') param = param.name('foo') value = collections.OrderedDict( [('x', [collections.OrderedDict([('y', [1.0, 2.0, 3.0])])])] ) result = bigquery_param(param, value) expected = { 'name': 'foo', 'parameterType': { 'structTypes': [ { 'name': 'x', 'type': { 'arrayType': { 'structTypes': [ { 'name': 'y', 'type': { 'arrayType': {'type': 'FLOAT64'}, 'type': 'ARRAY', }, } ], 'type': 'STRUCT', }, 'type': 'ARRAY', }, } ], 'type': 'STRUCT', }, 'parameterValue': { 'structValues': { 'x': { 'arrayValues': [ { 'structValues': { 'y': { 'arrayValues': [ {'value': 1.0}, {'value': 2.0}, {'value': 3.0}, ] } } } ] } } }, } assert result.to_api_repr() == expected
def test_scalar_param_scope(alltypes, project_id): t = alltypes param = ibis.param('timestamp') mut = t.mutate(param=param).compile(params={param: '2017-01-01'}) assert ( mut == """\ SELECT *, @param AS `param` FROM `{}.testing.functional_alltypes`""".format( project_id ) )
def test_scalar_param_date(alltypes, df, date_value): param = ibis.param("date") expr = alltypes[alltypes.timestamp_col.cast("date") <= param] result = (expr.execute(params={ param: date_value }).sort_values("timestamp_col").reset_index(drop=True)) value = pd.Timestamp(date_value) value = pd.to_datetime(value).tz_localize("UTC") expected = (df.loc[df.timestamp_col.dt.normalize() <= value].sort_values( "timestamp_col").reset_index(drop=True)) tm.assert_frame_equal(result, expected)
def test_scalar_param_timestamp(alltypes, df, timestamp_value): param = ibis.param('timestamp') expr = alltypes[alltypes.timestamp_col <= param][['timestamp_col']] result = expr.execute(params={ param: timestamp_value }).sort_values('timestamp_col').reset_index(drop=True) value = pd.Timestamp(timestamp_value) expected = df.loc[df.timestamp_col <= value, ['timestamp_col']].sort_values( 'timestamp_col').reset_index(drop=True) tm.assert_frame_equal(result, expected)
def test_timestamp_accepts_date_literals(alltypes, project_id): date_string = '2009-03-01' param = ibis.param(dt.timestamp).name('param_0') expr = alltypes.mutate(param=param) params = {param: date_string} result = expr.compile(params=params) expected = """\ SELECT *, @param AS `param` FROM `{}.testing.functional_alltypes`""".format( project_id ) assert result == expected
def test_scalar_param_boolean(alltypes, df): param = ibis.param("boolean") expr = alltypes[(alltypes.bool_col == param)] bool_value = True result = (expr.execute(params={ param: bool_value }).sort_values("id").reset_index(drop=True)) expected = ( df.loc[df.bool_col == bool_value].sort_values("id").reset_index( drop=True)) tm.assert_frame_equal(result, expected)
def test_scalar_param_string(alltypes, df): param = ibis.param('string') expr = alltypes[alltypes.string_col == param] string_value = '0' result = ( expr.execute(params={param: string_value}) .sort_values('id') .reset_index(drop=True) ) expected = ( df.loc[df.string_col == string_value] .sort_values('id') .reset_index(drop=True) ) tm.assert_frame_equal(result, expected)
def test_scalar_param_double(alltypes, df): param = ibis.param('double') expr = alltypes[alltypes.string_col.cast('int64').cast('double') == param] double_value = 0.0 result = ( expr.execute(params={param: double_value}) .sort_values('id') .reset_index(drop=True) ) expected = ( df.loc[df.string_col.astype('int64').astype('float64') == double_value] .sort_values('id') .reset_index(drop=True) ) tm.assert_frame_equal(result, expected)
def test_scalar_param_boolean(alltypes, df): param = ibis.param('boolean') expr = alltypes[(alltypes.string_col.cast('int64') == 0) == param] bool_value = True result = ( expr.execute(params={param: bool_value}) .sort_values('id') .reset_index(drop=True) ) expected = ( df.loc[df.string_col.astype('int64') == 0] .sort_values('id') .reset_index(drop=True) ) tm.assert_frame_equal(result, expected)
def test_scalar_param_timestamp(alltypes, df, timestamp_value): param = ibis.param('timestamp') expr = alltypes[alltypes.timestamp_col <= param][['timestamp_col']] result = ( expr.execute(params={param: timestamp_value}) .sort_values('timestamp_col') .reset_index(drop=True) ) value = pd.Timestamp(timestamp_value) expected = ( df.loc[df.timestamp_col <= value, ['timestamp_col']] .sort_values('timestamp_col') .reset_index(drop=True) ) tm.assert_frame_equal(result, expected)
def test_scalar_param_date(alltypes, df, date_value): param = ibis.param('date') expr = alltypes[alltypes.timestamp_col.cast('date') <= param] result = ( expr.execute(params={param: date_value}) .sort_values('timestamp_col') .reset_index(drop=True) ) value = pd.Timestamp(date_value) expected = ( df.loc[df.timestamp_col.dt.normalize() <= value] .sort_values('timestamp_col') .reset_index(drop=True) ) tm.assert_frame_equal(result, expected)
def test_timestamp_accepts_date_literals(backend, alltypes): date_string = '2009-03-01' param = ibis.param(dt.timestamp) expr = alltypes.mutate(param=param) params = {param: date_string} assert expr.compile(params=params) is not None
def test_scalar_param_array(backend, con): value = [1, 2, 3] param = ibis.param(dt.Array(dt.int64)) result = con.execute(param.length(), params={param: value}) assert result == len(value)
def test_scalar_param_struct(client): struct_type = dt.Struct.from_tuples([('x', dt.int64), ('y', dt.string)]) param = ibis.param(struct_type) value = collections.OrderedDict([('x', 1), ('y', 'foobar')]) result = client.execute(param, {param: value}) assert value == result
def test_scalar_param_map(backend, con): value = {'a': 'ghi', 'b': 'def', 'c': 'abc'} param = ibis.param(dt.Map(dt.string, dt.string)) result = con.execute(param['b'], params={param: value}) assert result == value['b']
def test_scalar_parameter(t, df, raw_value): value = ibis.param(dt.double) expr = t.float64_with_zeros == value result = expr.execute(params={value: raw_value}) expected = df.float64_with_zeros == raw_value tm.assert_series_equal(result, expected)
def test_scalar_parameter_repr(): value = ibis.param(dt.timestamp).name('value') assert repr(value) == 'value = ScalarParameter[timestamp]' value_op = value.op() assert repr(value_op) == "ScalarParameter(type=timestamp)"
def test_scalar_parameter_set(): value = ibis.param({dt.int64}) assert isinstance(value.op(), ops.ScalarParameter) assert value.type().equals(dt.Set(dt.int64))
def test_scalar_parameter_repr(): value = ibis.param(dt.timestamp).name('value') assert repr(value) == 'value = ScalarParameter[timestamp]' value_op = value.op() assert repr(value_op) == "ScalarParameter(type=timestamp)" @pytest.mark.parametrize( ('left', 'right', 'expected'), [ ( # same value type, same name ibis.param(dt.timestamp), ibis.param(dt.timestamp), False, ), ( # different value type, same name ibis.param(dt.date), ibis.param(dt.timestamp), False, ), ( # same value type, different name ibis.param(dt.timestamp), ibis.param(dt.timestamp), False, ),
def test_scalar_param_array(alltypes, df): param = ibis.param('array<double>') expr = alltypes.sort_by('id').limit(1).double_col.collect() + param result = expr.execute(params={param: [1]}) expected = [df.sort_values('id').double_col.iat[0]] + [1.0] assert result == expected
def test_scalar_parameter_formatting(): value = ibis.param('array<date>') assert str(value) == 'ScalarParameter[array<date>]' value = ibis.param('int64').name('my_param') assert str(value) == 'my_param = ScalarParameter[int64]'