Exemple #1
0
def test_complex_type_warning():
    val = threading.Lock()
    with pytest.warns(
            UserWarning,
            match="Found a constant",
    ):
        assert CodeHasher.hash(val) == CodeHasher.hash(TypePrefix.DEFAULT)
Exemple #2
0
def test_changes_in_another_module(is_module_internal):
    f_mod_code = """
    def f_mod():
        return 1
    """
    m = import_code(dedent(f_mod_code), is_module_internal=is_module_internal)

    def f():
        return m.f_mod()

    old_hash = CodeHasher.hash(f)
    assert old_hash == CodeHasher.hash(f)

    # Hash for f should not change if we change f_mod when module is
    # external.
    f_mod_code = """
    def f_mod():
        return 2
    """
    m = import_code(dedent(f_mod_code), is_module_internal=is_module_internal)

    new_hash = CodeHasher.hash(f)
    assert new_hash == CodeHasher.hash(f)
    if is_module_internal:
        assert old_hash == new_hash
    else:
        assert old_hash != new_hash
Exemple #3
0
def test_complex_type_warning():
    lock = threading.Lock()
    cond = threading.Condition()
    with pytest.warns(
            UserWarning,
            match="Found a complex object",
    ):
        assert CodeHasher.hash(lock) == CodeHasher.hash(cond)

    with pytest.warns(None) as warnings:
        assert CodeHasher.hash(lock, True) == CodeHasher.hash(cond, True)
    assert len(warnings) == 0
Exemple #4
0
def test_complex_type_warning():
    val = threading.Lock()
    with pytest.warns(
            UserWarning,
            match="Found a complex object",
    ):
        assert CodeHasher.hash(val) == CodeHasher.hash(TypePrefix.DEFAULT)

    with pytest.warns(None) as warnings:
        assert CodeHasher.hash(val, True) == CodeHasher.hash(
            TypePrefix.DEFAULT, True)
    assert len(warnings) == 0
Exemple #5
0
def test_changes_in_references():
    v = 10

    def f():
        return v

    old_hash = CodeHasher.hash(f)
    assert old_hash == CodeHasher.hash(f)

    # Hash for f should change if we change v.
    v = 20
    new_hash = CodeHasher.hash(f)
    assert new_hash == CodeHasher.hash(f)
    assert old_hash != new_hash

    def f1():
        return 1

    def count(v):
        if v == 0:
            return 0
        return count(v - 1) + f1()

    old_hash = CodeHasher.hash(count)
    assert old_hash == CodeHasher.hash(count)

    # Hash for count should change if we change f1.
    def f1():  # noqa: F811
        return 2

    new_hash = CodeHasher.hash(count)
    assert new_hash == CodeHasher.hash(count)
    assert old_hash != new_hash
Exemple #6
0
def check_hash_equivalence(groups):
    """
    Checks that hashes for every element in a given group are the same
    and hashes for elements between the groups are different. It also
    hashes the elements in every group twice to test that the hash is
    stable.
    """

    all_hashes = set()
    for group in groups:
        group_hashes = set()
        for f in group:
            group_hashes.add(CodeHasher.hash(f))
            group_hashes.add(CodeHasher.hash(f))
        assert len(group_hashes) == 1
        all_hashes.add(next(iter(group_hashes)))

    assert len(all_hashes) == len(groups)
Exemple #7
0
def test_code_type_refs_warning():
    code1 = import_code.__code__
    code2 = check_hash_equivalence.__code__

    def print_code1():
        logging.info(code1)

    def print_code2():
        logging.info(code2)

    with pytest.warns(
            UserWarning,
            match="Found a complex object",
    ):
        assert CodeHasher.hash(print_code1) == CodeHasher.hash(print_code2)

    with pytest.warns(None) as warnings:
        assert CodeHasher.hash(print_code1,
                               True) == CodeHasher.hash(print_code2, True)
    assert len(warnings) == 0
Exemple #8
0
def test_same_func_different_names():
    def f1():
        v = 10
        return v

    def f2():
        v = 10
        return v

    assert CodeHasher.hash(f1) == CodeHasher.hash(f1)
    assert CodeHasher.hash(f2) == CodeHasher.hash(f2)
    assert CodeHasher.hash(f1) == CodeHasher.hash(f2)
Exemple #9
0
def test_code_hasher():
    def barray(value):
        return bytearray(value, "utf8")

    circular_dict_1_a = {"k11": "v1"}
    circular_dict_1_b = {"k21": "v2"}
    circular_dict_1_c = {"k31": "v3"}
    circular_dict_1_a["k12"] = circular_dict_1_b
    circular_dict_1_b["k22"] = circular_dict_1_c
    circular_dict_1_c["k32"] = circular_dict_1_a

    circular_dict_2_a = {"k11": "v1"}
    circular_dict_2_b = {"k21": "v2"}
    circular_dict_2_c = {"k31": "v3"}
    circular_dict_2_a["k12"] = circular_dict_2_b
    circular_dict_2_b["k22"] = circular_dict_2_c
    circular_dict_2_c["k32"] = circular_dict_2_b

    def f1():
        v = 10
        return v

    def f2():
        v = 20
        return v

    def f3():
        v = "10"
        return v

    def f4():
        return "10"

    def g1():
        return global_var_10

    def g2():
        return global_var_20

    free_var_10 = 10
    free_var_20 = 20

    def free1():
        return free_var_10

    def free2():
        return free_var_20

    def fref1():
        return f1()

    def fref2():
        return f2()

    def inc(x):
        return x + 1

    def dec(x):
        return x - 1

    def one():
        return 1

    def inc_with_one(x):
        return x + one()

    def dec_with_one(x):
        return x - one()

    def quadratic_eq(a, b, c):
        d = b**2 - 4 * a * c
        s1 = (b - cmath.sqrt(d)) / (2 * a)
        s2 = (-b - cmath.sqrt(d)) / (2 * a)
        return (s1, s2)

    def logistic_reg(train_frame, random_seed, hyperparams_dict):
        m = linear_model.LogisticRegression(solver="liblinear",
                                            random_state=random_seed,
                                            **hyperparams_dict)
        m.fit(train_frame.drop("target", axis=1), train_frame["target"])
        return m

    def a_lot_of_consts(train_frame, random_seed, hyperparams_dict):
        docstring = """
        This function uses a few constants and demonstrates that Bionic
        can hash all of them without any issues.
        """
        logging.log(docstring)  # Log these variables to avoid F841 errors.
        add_numbers = lambda x, y: x + y  # noqa: E731
        four = add_numbers(2, 2)
        logging.log(four)
        seven = add_numbers(3, 4)
        logging.log(seven)

        a, b, c = (1, -30, 200)
        (s1, s2) = quadratic_eq(a, b, c)
        assert [s1, s2] == [10, 20]

    def f_with_defaults1(x=10, y=20):
        return x + y

    def f_with_defaults2(x=20, y=10):
        return x + y

    def f_with_defaults3(x=10.0, y=20):
        return x + y

    def f_docstring1():
        """Docstring1"""
        pass

    def f_docstring2():
        """Docstring2"""
        pass

    def fib(n):
        return fib(n - 1) + fib(n - 2)

    def nested():
        v = 1

        def inner():
            logging.info(v)
            w = 5

            def innermost():
                logging.info(v, w)

    class ClassDefault:
        v = 1

    class ClassWithInit:
        v = 1

        def __init__(self):
            self.a = 1

    class ClassWithDifferentInit:
        v = 1

        def __init__(self):
            self.a = 2

    class ClassWithInnerClass:
        v = 1

        def __init__(self):
            self.a = 1

        class InnerClass:
            def __init__(self):
                self.i = 1

    class ClassWithDifferentInnerClass:
        v = 1

        def __init__(self):
            self.a = 1

        class InnerClass:
            def __init__(self):
                self.i = 2

    class ClassWithMethod:
        def v(self):
            return 1

    class ClassWithClassMethod1:
        @classmethod
        def v(cls):
            return 10

    class ClassWithClassMethod2:
        @classmethod
        def v(cls):
            return 20

    class ClassWithProperty:
        def __init__(self):
            self._a = 1

        @property
        def a(self):
            return self._a

        @a.setter
        def a(self, value):
            self._a = value

    class ClassWithDiffProperty:
        def __init__(self):
            self._a = 1

        @property
        def a(self):
            return self._a + 1

        @a.setter
        def a(self, value):
            self._a = value

    class ClassWithDynamicAttrLikeProperty:
        def __init__(self):
            self._a = 1

        @property
        def a(self):
            return self._a

    class ClassWithDynamicAttr:
        def __init__(self):
            self._a = 1

        @types.DynamicClassAttribute
        def a(self):
            return self._a

    class ClassWithDiffDynamicAttr:
        def __init__(self):
            self._a = 1

        @types.DynamicClassAttribute
        def a(self):
            return self._a + 1

    @attr.s(frozen=True)
    class AttrClassFrozen:
        a = attr.ib()

    @attr.s
    class AttrClass:
        a = attr.ib()

    @attr.s
    class AttrClassWithDefaults:
        a = attr.ib(default=1)

    @attr.s
    class AttrClassWithMultipleMembers:
        a = attr.ib()
        b = attr.ib()

    @attr.s
    class AttrClassWithMetadata:
        a = attr.ib(metadata={"a": 1})

    values = [
        b"",
        b"123",
        b"None",
        barray("bytearray"),
        barray("anotherbytearray"),
        None,
        ...,
        NotImplemented,
        "",
        "None",
        "String1",
        "String2",
        "0",
        "1",
        "123",
        "1.23",
        "invalid utf8 \udc80",
        0,
        1,
        123,
        23,
        1.23,
        23.0,
        complex(0),
        complex(1),
        complex(123),
        complex(1, 23),
        float("inf"),
        float("-inf"),
        float("nan"),
        True,
        False,
        [],
        [1, 2, 3],
        [1, 2, "3"],
        [1, 2],
        (),
        (1, 2, 3),
        (1, 2, "3"),
        (1, 2),
        {},
        {1, 2, 3},
        {1, 2, "3"},
        {1, 2},
        frozenset(),
        frozenset({1, 2, 3}),
        frozenset({1, 2, "3"}),
        frozenset({1, 2}),
        range(1, 2, 1),
        range(1, 3, 1),
        range(1, 3, 2),
        {
            0: "v1",
            1: None,
            "2": ["value1", "value2"]
        },
        {
            0: "v1",
            1: None,
            "2": ["value1", "value2"],
            None: "none_val"
        },
        {
            0: "v1",
            1: {
                10: "v2",
                20: {
                    100: [200, 300]
                }
            },
            "2": ["value1", "value2"],
            None: "none_val",
        },
        circular_dict_1_a,
        circular_dict_2_a,
        types.MappingProxyType({}),
        types.MappingProxyType({
            0: "v1",
            1: None,
            "2": ["value1", "value2"]
        }),
        f1,
        f2,
        f3,
        f4,
        g1,
        g2,
        free1,
        free2,
        fref1,
        fref2,
        inc,
        dec,
        inc_with_one,
        dec_with_one,
        lambda x: x * 2,
        lambda x: x / 2,
        lambda: None,
        quadratic_eq,
        logistic_reg,
        a_lot_of_consts,
        f_with_defaults1,
        f_with_defaults2,
        f_with_defaults3,
        f_docstring1,
        f_docstring2,
        fib,
        nested,
        ClassDefault,
        ClassWithInit,
        ClassWithDifferentInit,
        ClassWithInnerClass,
        ClassWithDifferentInnerClass,
        ClassWithMethod,
        ClassWithClassMethod1,
        ClassWithClassMethod2,
        ClassWithProperty,
        ClassWithDiffProperty,
        ClassWithDynamicAttrLikeProperty,
        ClassWithDynamicAttr,
        ClassWithDiffDynamicAttr,
        AttrClassFrozen,
        AttrClass,
        AttrClassWithDefaults,
        AttrClassWithMultipleMembers,
        AttrClassWithMetadata,
        TypePrefix,
        TypePrefix.BYTES,
        TypePrefix.BYTEARRAY,
    ]

    values_with_complex_types = [
        lambda x=threading.Lock(): x,
        threading.Lock(),
    ]

    idx_by_hash_value = {}
    for idx, val in enumerate(values + values_with_complex_types):
        if idx >= len(values):
            ctx_mgr = pytest.warns(UserWarning, match="Found a complex object")
        else:
            ctx_mgr = contextlib.suppress()

        with ctx_mgr:
            hash_value = CodeHasher.hash(val)
            # Hashing again should return the same hash value.
            assert CodeHasher.hash(val) == hash_value
            assert (
                hash_value not in idx_by_hash_value
            ), f"{values[idx]} and {values[idx_by_hash_value[hash_value]]} have the same hash"
            idx_by_hash_value[hash_value] = idx
Exemple #10
0
def test_code_hasher():
    def barray(value):
        return bytearray(value, "utf8")

    circular_dict_1_a = {"k11": "v1"}
    circular_dict_1_b = {"k21": "v2"}
    circular_dict_1_c = {"k31": "v3"}
    circular_dict_1_a["k12"] = circular_dict_1_b
    circular_dict_1_b["k22"] = circular_dict_1_c
    circular_dict_1_c["k32"] = circular_dict_1_a

    circular_dict_2_a = {"k11": "v1"}
    circular_dict_2_b = {"k21": "v2"}
    circular_dict_2_c = {"k31": "v3"}
    circular_dict_2_a["k12"] = circular_dict_2_b
    circular_dict_2_b["k22"] = circular_dict_2_c
    circular_dict_2_c["k32"] = circular_dict_2_b

    def f1():
        v = 10
        return v

    def f2():
        v = 20
        return v

    def f3():
        v = "10"
        return v

    def f4():
        return "10"

    def inc(x):
        return x + 1

    def dec(x):
        return x - 1

    def quadratic_eq(a, b, c):
        d = b**2 - 4 * a * c
        s1 = (b - cmath.sqrt(d)) / (2 * a)
        s2 = (-b - cmath.sqrt(d)) / (2 * a)
        return (s1, s2)

    def logistic_reg(train_frame, random_seed, hyperparams_dict):
        from sklearn import linear_model

        m = linear_model.LogisticRegression(solver="liblinear",
                                            random_state=random_seed,
                                            **hyperparams_dict)
        m.fit(train_frame.drop("target", axis=1), train_frame["target"])
        return m

    def a_lot_of_consts(train_frame, random_seed, hyperparams_dict):
        import logging

        docstring = """
        This function uses a few constants and demonstrates that Bionic
        can hash all of them without any issues.
        """
        logging.log(docstring)  # Log these variables to avoid F841 errors.
        add_numbers = lambda x, y: x + y  # noqa: E731
        four = add_numbers(2, 2)
        logging.log(four)
        seven = add_numbers(3, 4)
        logging.log(seven)

        a, b, c = (1, -30, 200)
        (s1, s2) = quadratic_eq(a, b, c)
        assert [s1, s2] == [10, 20]

    def f_with_defaults1(x=10, y=20):
        return x + y

    def f_with_defaults2(x=20, y=10):
        return x + y

    def f_with_defaults3(x=10.0, y=20):
        return x + y

    def f_docstring1():
        """Docstring1"""
        pass

    def f_docstring2():
        """Docstring2"""
        pass

    values = [
        b"",
        b"123",
        b"None",
        barray("bytearray"),
        barray("anotherbytearray"),
        None,
        "",
        "None",
        "String1",
        "String2",
        "0",
        "1",
        "123",
        "1.23",
        0,
        1,
        123,
        23,
        1.23,
        23.0,
        float("inf"),
        float("-inf"),
        float("nan"),
        True,
        False,
        [],
        [1, 2, 3],
        [1, 2, "3"],
        [1, 2],
        (),
        (1, 2, 3),
        (1, 2, "3"),
        (1, 2),
        {},
        {1, 2, 3},
        {1, 2, "3"},
        {1, 2},
        {
            0: "v1",
            1: None,
            "2": ["value1", "value2"]
        },
        {
            0: "v1",
            1: None,
            "2": ["value1", "value2"],
            None: "none_val"
        },
        {
            0: "v1",
            1: {
                10: "v2",
                20: {
                    100: [200, 300]
                }
            },
            "2": ["value1", "value2"],
            None: "none_val",
        },
        circular_dict_1_a,
        circular_dict_2_a,
        f1,
        f2,
        f3,
        f4,
        inc,
        dec,
        lambda x: x * 2,
        lambda x: x / 2,
        lambda: None,
        quadratic_eq,
        logistic_reg,
        a_lot_of_consts,
        f_with_defaults1,
        f_with_defaults2,
        f_with_defaults3,
        f_docstring1,
        f_docstring2,
    ]

    values_with_complex_types = [
        lambda x=threading.Lock(): x,
        threading.Lock(),
    ]

    idx_by_hash_value = {}
    for idx, val in enumerate(values + values_with_complex_types):
        if idx >= len(values):
            ctx_mgr = pytest.warns(UserWarning, match="Found a constant")
        else:
            ctx_mgr = contextlib.suppress()

        with ctx_mgr:
            hash_value = CodeHasher.hash(val)
            # Hashing again should return the same hash value.
            assert CodeHasher.hash(val) == hash_value
            assert (
                hash_value not in idx_by_hash_value
            ), f"{values[idx]} and {values[idx_by_hash_value[hash_value]]} have the same hash"
            idx_by_hash_value[hash_value] = idx