def test_normal_rand(): with raises(ValueError): NormalRand(1.0, 0.0) with raises(ValueError): NormalRand(1.0, -1.0) v = NormalRand(0.05, 0.2) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) res = v.generate_many(100000, 0) t = stats.kstest(res, "norm", args=(0.05, 0.2)) assert t.pvalue > 0.4 v = NormalRand(0.05, 0.2, q=0.1) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = [ x for x in v.generate_many(1000, 0) if x >= -0.155 and x <= 0.255 ] assert_close([-0.15, -0.05, 0.05, 0.15, 0.25], actual) v2 = NormalRand(0.05, 0.2, q=0.1) v3 = Rand(0.05, 0.2, q=0.1) assert to_uuid(v) == to_uuid(v2) assert to_uuid(v) != to_uuid(v3)
def test_randint(): with raises(ValueError): RandInt(0, 10, log=True) # for log, low>=1.0 v = RandInt(10, 20, log=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = set(v.generate_many(100, 0)) assert set(range(10, 21)) == actual v = RandInt(10, 20, include_high=False) actual = set(v.generate_many(100, 0)) assert set(range(10, 20)) == actual v = RandInt(10, 20, q=5, include_high=False) actual = set(v.generate_many(100, 0)) assert set([10, 15]) == actual v = RandInt(10, 20, q=5, include_high=True) actual = set(v.generate_many(100, 0)) assert set([10, 15, 20]) == actual v = RandInt(3, 20, log=True) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = set(v.generate_many(1000, 0)) assert set(range(3, 21)) == actual v1 = RandInt(1, 20, q=2) v2 = RandInt(1, 20, q=2) v3 = Rand(1, 20, q=2) assert to_uuid(v1) == to_uuid(v2) assert to_uuid(v1) != to_uuid(v3)
def __init__(self, func: Callable): super().__init__() assert_or_throw(callable(func), lambda: ValueError(func)) self._func = func if isinstance(func, LambdaType): self._uuid = to_uuid("lambda") else: self._uuid = to_uuid(get_full_type_path(func))
def test_grid(): v = Grid("a", "b") assert ["a", "b"] == list(v) v2 = Grid("b", "a") assert v == v and v != v2 assert to_uuid(v) != to_uuid(v2) raises(ValueError, lambda: Grid())
def test_rand(): with raises(ValueError): Rand(1.0, 0.9) with raises(ValueError): Rand(1.0, 10, q=-0.1) with raises(ValueError): Rand(1.0, 1.0, include_high=False) with raises(ValueError): Rand(0.0, 1.0, log=True) # for log, low>=1.0 v = Rand(0.1, 0.1, q=0.1, log=False) assert 0.1 == v.generate() assert 0.1 == v.generate(10) v = Rand(1.0, 1.0, q=0.1, log=True) assert 1.0 == v.generate() assert 1.0 == v.generate(10) v = Rand(1.0, 2.0, q=0.1, log=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 21)], v.generate_many(100, 0)) v = Rand(1.0, 2.09999, q=0.1, log=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 21)], v.generate_many(100, 0)) v = Rand(1.0, 2.0, q=0.1, log=False, include_high=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 20)], v.generate_many(100, 0)) v = Rand(1.0, 2.09999, q=0.1, log=False, include_high=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 21)], v.generate_many(100, 0)) v = Rand(0.1, 2.0, log=True, include_high=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) res = v.generate_many(10000, 0) for x in res: assert x >= 0.1 and x <= 2.0 t = stats.kstest(np.log(res), "uniform", args=(np.log(0.1), np.log(2) - np.log(0.1))) assert t.pvalue > 0.4 v1 = Rand(1.0, 2.0, q=0.1, log=False) v2 = Rand(1.0, 2.0, log=False, q=0.1) v3 = Rand(1.0, 2.0, log=False) assert to_uuid(v1) == to_uuid(v2) assert to_uuid(v1) != to_uuid(v3)
def test_rpc_func(): def f1(a: str) -> str: return "1" d1 = RPCFunc(f1) d2 = to_rpc_handler(f1) assert to_uuid(d1) == to_uuid(d2) assert to_uuid(d1) == to_uuid(to_rpc_handler(d1)) assert "1" == d1("x") with raises(ValueError): RPCFunc(1)
def test_named_col(): assert "*" == str(col("*")) assert col("*").wildcard assert "" == col("*").infer_alias().output_name raises(ValueError, lambda: col("*").alias("x")) raises(ValueError, lambda: col("*").cast("long")) assert "a" == str(col("a")) assert not col("a").wildcard assert "a" == str(col(col("a"))) assert "ab AS xx" == str(col("ab").alias("xx")) assert "ab AS xx" == str(col("ab", "xx").cast(None)) assert "CAST(ab AS long) AS xx" == str(col("ab", "xx").cast("long")) assert "ab AS xx" == str(col("ab").alias("xx")) assert "ab AS xx" == str(col("ab").alias("xx")) assert "CAST(ab AS long) AS xx" == str(col("ab").alias("xx").cast(int)) raises(NotImplementedError, lambda: col([1, 2])) assert to_uuid(col("a")) != to_uuid(col("b")) assert to_uuid(col("a")) != to_uuid(col("a").alias("v")) assert to_uuid(col("a")) != to_uuid(col("a").cast(int)) assert to_uuid(col("a").cast(int).alias("v")) == to_uuid( col("a").alias("v").cast(int)) assert "" == col("a").infer_alias().as_name assert "a" == str(col("a").infer_alias()) assert "a" == col("a").cast(int).infer_alias().as_name c = col("a").cast(int).infer_alias() assert "CAST(a AS long) AS a" == str(c) c = col("a").cast(int).alias("x").infer_alias() assert "CAST(a AS long) AS x" == str(c)
def test_unary_op(): assert "-(a)" == str(-col("a")) assert "a" == (-col("a")).infer_alias().output_name assert "a" == str(+col("a")) assert "~(a)" == str(~col("a")) assert "IS_NULL(a)" == str(col("a").is_null()) assert "NOT_NULL(a)" == str(col("a").not_null()) assert "NOT_NULL(a) AS xx" == str(col("a").not_null().alias("xx")) assert "NOT_NULL(a)" == str(col("a").not_null()) assert "NOT_NULL(a) AS xx" == str(col("a").not_null().alias("xx")) assert "a" == col("a").not_null().infer_alias().output_name assert "NOT_NULL(a) AS a" == str(col("a").not_null().infer_alias()) assert to_uuid(col("a").not_null()) == to_uuid(col("a").not_null()) assert to_uuid(col("a").not_null()) != to_uuid(col("a").is_null())
def test_func_param(): def tf(*args, x, y): return sum(args) + x + y f1 = FuncParam(tf, 4, x=1, y=2) assert 7 == f1() f2 = FuncParam(tf, 4, x=1, y=2) f3 = FuncParam(tf, 5, x=1, y=2) assert f1 == f2 assert f1 != f3 assert to_uuid(f1) == to_uuid(f2) assert to_uuid(f1) != to_uuid(f3) f1[0] = 5 f1["y"] = 3 assert 5 == f1[0] assert 3 == f1["y"] assert 9 == f1()
def test_choice(): raises(ValueError, lambda: Choice()) v = Choice("a", "b", "c") assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert v.generate_many(20, 0) == v.generate_many(20, 0) assert v.generate_many(20, 0) != v.generate_many(20, 1) actual = set(v.generate_many(20, 0)) assert set(["a", "b", "c"]) == actual assert to_uuid(v) != to_uuid(Grid("a", "b", "c")) assert v != Grid("a", "b", "c") v = Choice(1, 2, 3) assert json.loads(json.dumps({"x": v.generate(0)}))["x"] <= 3 v = Choice("a", "b", "c") assert isinstance(json.loads(json.dumps({"x": v.generate(0)}))["x"], str)
def __uuid__(self) -> str: """The unique id of this instance :return: the unique id """ return to_uuid( str(type(self)), self.as_name, self.as_type, *self._uuid_keys(), )
def test_normal_randint(): v = NormalRandInt(5, 2) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = set(v.generate_many(50, 0)) for x in [3, 4, 5, 6, 7]: assert x in actual v = NormalRandInt(5, 2, q=3) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(2) actual = set(v.generate_many(50, 0)) for x in [-1, 2, 5, 8, 11]: assert x in actual assert 6 not in actual v2 = NormalRandInt(5, 2, q=3) v3 = NormalRand(5, 2, q=3) assert to_uuid(v) == to_uuid(v2) assert to_uuid(v) != to_uuid(v3)
def _to_trail_row(data: Dict[str, Any], metadata: Dict[str, Any]) -> Dict[str, Any]: key_names = sorted(k for k in data.keys() if not k.startswith(TUNE_PREFIX)) keys = [data[k] for k in key_names] trials: Dict[str, Trial] = {} for params in pickle.loads(data[TUNE_DATASET_PARAMS_PREFIX]): tid = to_uuid(keys, params) trials[tid] = Trial(trial_id=tid, params=params, metadata=metadata, keys=keys) data[TUNE_DATASET_TRIALS] = to_base64(list(trials.values())) del data[TUNE_DATASET_PARAMS_PREFIX] return data
def _to_trail_row(data: Dict[str, Any], metadata: Dict[str, Any]) -> Dict[str, Any]: key_names = sorted(k for k in data.keys() if not k.startswith(TUNE_PREFIX)) keys = [data[k] for k in key_names] trials: Dict[str, Dict[str, Any]] = {} for param in pickle.loads(data[TUNE_DATASET_PARAMS_PREFIX]): p = ParamDict( sorted(((k, v) for k, v in param.items()), key=lambda x: x[0])) tid = to_uuid(keys, p) trials[tid] = Trial(trial_id=tid, params=p, metadata=metadata, keys=keys).jsondict data[TUNE_DATASET_TRIALS] = json.dumps(list(trials.values())) del data[TUNE_DATASET_PARAMS_PREFIX] return data
def test_template_misc(): # to_template t = to_template(dict(a=1, b=Grid(0, 1))) assert isinstance(t, TuningParametersTemplate) t2 = to_template(t) assert t is t2 t3 = to_template(t.encode()) assert t == t3 raises(ValueError, lambda: to_template(123)) # uuid u = Grid(0, 1) t1 = make_template(dict(a=1, b=u, c=Grid(0, 1))) t2 = make_template(dict(a=1, b=u, c=Grid(0, 1))) t3 = make_template(dict(a=1, b=u, c=u)) t4 = make_template(dict(a=1, b=u, c=u)) assert to_uuid(t1) == to_uuid(t2) assert to_uuid(t2) != to_uuid(t3) assert to_uuid(t3) == to_uuid(t4) # simple value u = Grid(0, 1) t1 = make_template( dict(a=1, b=u, c=Grid(0, 1), d=FuncParam(lambda x: x + 1, u))) raises(ValueError, lambda: t1.simple_value) assert [ dict(a=1, b=0, c=0, d=1), dict(a=1, b=0, c=1, d=1), dict(a=1, b=1, c=0, d=2), dict(a=1, b=1, c=1, d=2), ] == list(t1.product_grid()) t2 = make_template(dict(a=1, b=2)) dict(a=1, b=2) == t2.simple_value t2 = make_template(dict(a=1, b=FuncParam(lambda x: x + 1, x=2))) assert dict(a=1, b=3) == t2.simple_value
def __uuid__(self): """The unique id representing this template""" if self._uuid == "": self._uuid = to_uuid(self._units, self._template) return self._uuid
def __uuid__(self) -> str: return to_uuid(self.jsondict)
def __uuid__(self) -> str: return to_uuid("grid", self._values)
def __uuid__(self): """Unique id for this collection""" return to_uuid(self._distinct, self.all_cols)
def test_lit_col(): assert "NULL" == str(lit(None)) assert "TRUE" == str(null().is_null()) assert "FALSE" == str(null().not_null()) assert "'a'" == str(lit("a")) assert "'a\"\\'\\\\'" == str(lit("a\"'\\")) assert "'a' AS x" == str(lit("a", "x")) assert "TRUE" == str(lit("a").not_null()) assert "FALSE" == str(lit("a").is_null()) assert "1.1" == str(lit(1.1)) assert "11" == str(lit(11)) assert "TRUE" == str(lit(True)) assert "FALSE" == str(lit(False)) assert "1 AS xx" == str(lit(1).alias("xx")) assert "'ab' AS xx" == str(lit("ab").alias("xx")) raises(NotImplementedError, lambda: lit([1, 2])) assert to_uuid(lit("a")) != to_uuid(col("a")) assert to_uuid(lit(1)) != to_uuid(lit("1")) assert to_uuid(null()) == to_uuid(null()) assert to_uuid(null()) != to_uuid(lit(1)) assert to_uuid(lit("a")) != to_uuid(lit("a").alias("v")) assert to_uuid(lit("a")) != to_uuid(lit("a").cast(int)) assert to_uuid(lit("a").cast(int).alias("v")) == to_uuid( lit("a").alias("v").cast(int))
def __uuid__(self) -> str: """Unique id for this expression""" return to_uuid(get_full_type_path(self._func), self._args, self._kwargs)
def test_determinism(): def _f1(a: str) -> str: return "1" assert to_uuid(RPCFunc(_f1)) == to_uuid(to_rpc_handler(_f1)) assert to_uuid(RPCFunc(lambda x: x)) == to_uuid(RPCFunc(lambda x: x + 1))
def __uuid__(self) -> str: return to_uuid(self.expr, self.positions)
def _get_judge(self, trial: Trial) -> _PerPartition: key = to_uuid(trial.keys) with self._lock: if key not in self._data: self._data[key] = _PerPartition(self, trial.keys) return self._data[key]
def __uuid__(self) -> str: """Unique id for the expression""" return to_uuid(self.jsondict)
def test_select_columns(): # not all with names cols = SelectColumns(col("a"), lit(1, "b"), col("bb") + col("cc"), f.first(col("c"))) assert to_uuid(cols) == to_uuid(cols) raises(ValueError, lambda: cols.assert_all_with_names()) # distinct cols2 = SelectColumns( col("a"), lit(1, "b"), col("bb") + col("cc"), f.first(col("c")), arg_distinct=True, ) assert to_uuid(cols) != to_uuid(cols2) # duplicated names cols = SelectColumns(col("a").alias("b"), lit(1, "b")) assert to_uuid(cols) != to_uuid( SelectColumns(col("a").alias("b"), lit(1, "c"))) raises(ValueError, lambda: cols.assert_all_with_names()) # with *, all cols must have alias cols = SelectColumns(col("*"), col("a")).assert_no_agg() raises(ValueError, lambda: cols.assert_all_with_names()) # * can be used at most once raises(ValueError, lambda: SelectColumns(col("*"), col("*"), col("a").alias("p"))) # * can't be used with aggregation raises(ValueError, lambda: SelectColumns(col("*"), f.first(col("a")).alias("x"))) cols = SelectColumns( col("aa").alias("a").cast(int), lit(1, "b"), (col("bb") + col("cc")).alias("c"), f.first(col("c")).alias("d"), ).assert_all_with_names() raises(AssertionError, lambda: cols.assert_no_agg()) assert not cols.simple assert 1 == len(cols.simple_cols) assert "CAST(aa AS long) AS a" == str(cols.simple_cols[0]) assert cols.has_literals assert 1 == len(cols.literals) assert "1 AS b" == str(cols.literals[0]) assert cols.has_agg assert 1 == len(cols.non_agg_funcs) assert "+(bb,cc) AS c" == str(cols.non_agg_funcs[0]) assert 1 == len(cols.agg_funcs) assert "FIRST(c) AS d" == str(cols.agg_funcs[0]) assert 2 == len(cols.group_keys) # a, c assert "aa" == cols.group_keys[0].output_name assert "" == cols.group_keys[1].output_name assert isinstance(cols.group_keys[1], _BinaryOpExpr) cols = SelectColumns(col("a")).assert_no_wildcard() assert cols.simple assert not cols.has_literals assert not cols.has_agg cols = SelectColumns(col("x"), col("*"), col("y") + col("z")) cols = cols.replace_wildcard(Schema("a:int,b:int")) assert "x" == str(cols.all_cols[0])