Beispiel #1
0
def shaped_to_invalid_mix(shaped: Shaped) -> Strategy[Mix]:
    polygons = shaped_to_polygons(shaped)
    vertices = list(
        flatten(
            chain(polygon.border.vertices,
                  flatten(hole.vertices for hole in polygon.holes))
            for polygon in polygons))
    edges = list(flatten(polygon.edges for polygon in polygons))
    scales = strategies.integers(2, 100)
    rational_edges = strategies.sampled_from(edges).map(to_rational_segment)
    return (strategies.builds(Mix,
                              sub_lists(vertices, min_size=2).map(Multipoint),
                              empty_geometries, strategies.just(shaped))
            | strategies.builds(
                Mix, empty_geometries,
                strategies.builds(stretch_segment_end, rational_edges, scales),
                strategies.just(shaped))
            | strategies.builds(
                Mix, empty_geometries,
                strategies.builds(stretch_segment_start, rational_edges,
                                  scales), strategies.just(shaped))
            | strategies.builds(
                Mix, empty_geometries,
                strategies.sampled_from(polygons).map(to_polygon_diagonals),
                strategies.just(shaped))
            | strategies.builds(Mix, empty_geometries,
                                sub_lists(edges, min_size=2).map(Multisegment),
                                strategies.just(shaped)))
Beispiel #2
0
def _to_initializer_body(
        signature_data: SignatureData,
        field_name_factory: Operator[str],
        instance_object_name: str = SELF_PARAMETER_NAME) -> List[ast.stmt]:
    if signature_data:

        def to_right_hand_value(parameter_name: str,
                                is_callable: bool) -> ast.expr:
            return (ast.Call(
                ast.Attribute(
                    _to_loaded_name(TYPES_MODULE_ALIAS), 'MethodType',
                    ast.Load()), [
                        ast.Lambda(_to_unit_signature(instance_object_name),
                                   _to_loaded_name(parameter_name)),
                        _to_loaded_name(instance_object_name)
                    ], []) if is_callable else _to_loaded_name(parameter_name))

        return ([
            ast.Import([ast.alias(TYPES_MODULE_NAME, TYPES_MODULE_ALIAS)])
        ] + [
            ast.Assign([
                ast.Attribute(ast.Name(instance_object_name, ast.Load()),
                              field_name_factory(parameter_name), ast.Store())
            ], to_right_hand_value(parameter_name, is_callable)) for
            parameter_name, _, is_callable in flatten(signature_data.values())
        ])
    else:
        return [ast.Pass()]
Beispiel #3
0
def _to_custom_constructor_body(
    signature_data: SignatureData,
    class_parameter_name: str = CLASS_PARAMETER_NAME,
    positional_kinds: Sequence[inspect._ParameterKind] = (
        inspect._POSITIONAL_ONLY, inspect._POSITIONAL_OR_KEYWORD)
) -> List[ast.stmt]:
    positionals = flatten(
        signature_data.get(kind, []) for kind in positional_kinds)
    keywords = signature_data.get(inspect._KEYWORD_ONLY, [])
    positional_arguments = [
        _to_loaded_name(parameter_name) for parameter_name, *_ in positionals
    ]
    if inspect._VAR_POSITIONAL in signature_data:
        (variadic_positional_name,
         *_), = (signature_data[inspect._VAR_POSITIONAL])
        variadic_positional_node = ast.Starred(
            _to_loaded_name(variadic_positional_name), ast.Load())
        positional_arguments.append(variadic_positional_node)
    keyword_arguments = [
        ast.keyword(name, _to_loaded_name(name))
        for name, _, is_callable in keywords
    ]
    if inspect._VAR_KEYWORD in signature_data:
        (variadic_keyword_name, *_), = signature_data[inspect._VAR_KEYWORD]
        variadic_keyword_node = ast.keyword(
            None, _to_loaded_name(variadic_keyword_name))
        keyword_arguments.append(variadic_keyword_node)
    return [
        ast.Return(
            ast.Call(_to_loaded_name(class_parameter_name),
                     positional_arguments, keyword_arguments))
    ]
Beispiel #4
0
 def to_names_with_signatures_data(
         signature_data: SignatureData
 ) -> Strategy[Tuple[str, SignatureData]]:
     used_names = frozenset(
         map(itemgetter(0), flatten(signature_data.values())))
     return strategies.tuples(
         names.filter(lambda name: name not in used_names),
         strategies.just(signature_data))
def test_mutate_any_node():
    # should be able to mutate every node (except 'root')
    sampler = mutate.get_random_mutation_sampler()
    rule_counter = Counter()
    for p in data.pipelines:
        t = pt.to_tree(p)
        t.annotate()
        flat_nodes = flatten(t)

        for n in flat_nodes:
            rules = sampler.sample_rules(n, return_proba=True)
            # always has one rule per node
            assert len(rules) == 1
            rule, prob = rules[0]
            rule_counter[type(rule)] += 1
            if pt.is_root_node(n):
                assert rule is None
                assert prob == 0.0
                continue

            if pt.is_composition_op(n):
                # can only insert for these
                if n.label.endswith("Pipeline"):
                    assert isinstance(rule, ComponentInsert)
                if n.label.endswith("FeatureUnion"):
                    assert isinstance(rule, (ComponentInsert, ComponentRemove))

            if pt.is_param_node(n):
                assert n.parent is not None
                parent_label = n.parent.label
                param = n.payload[0]
                if parent_label not in sampler.config or param not in sampler.config[
                        parent_label]:
                    assert isinstance(rule, HyperparamRemove) or rule is None
                    if rule is None:
                        assert prob == 0.0
                    else:
                        assert prob > 0.0
                    continue

                if param == "score_func":
                    assert rule is None
                    assert prob == 0.0
                    continue

            if pt.is_component_node(n):
                if isinstance(rule, ComponentInsert):
                    assert pt.is_composition_node(n)

            # all others should have non-identity rule
            assert rule is not None
            assert prob > 0.0

    # should have all rule types (bad luck if don't....)
    for t in RULE_TYPES:
        assert rule_counter.get(t, 0.0) > 0
def test_node_prob():
    sampler = mutate.get_random_mutation_sampler()
    for p in data.pipelines:
        t = pt.to_tree(p)
        flat_nodes = flatten(t)
        all_probs = []
        for n in flat_nodes:
            prob = sampler.get_probability(rule=None, node=n)
            assert prob >= 0.0 and prob <= 1.0
            all_probs.append(prob)

    # would be terrible luck if this failed...
    assert max(all_probs) > 0.0
Beispiel #7
0
hashables = (scalars
             | strings
             | to_homogeneous_frozensets(deferred_hashables)
             | to_homogeneous_tuples(deferred_hashables))
iterables = (strings
             | memory_views
             | to_homogeneous_sequences(deferred_objects))
sets = to_homogeneous_sets(hashables)
built_ins = vars(builtins).values()
built_in_callables = list(filter(callable, built_ins))
built_in_classes = [
    callable_ for callable_ in built_in_callables
    if isinstance(callable_, type)
]
built_in_classes_fields = list(
    flatten(vars(class_).values() for class_ in built_in_classes))


def round_trippable_built_in(object_: Any) -> bool:
    try:
        candidate = eval(serializers.complex_(object_),
                         {builtins.__name__: builtins})
    except Exception:
        return False
    else:
        return candidate is object_


objects = (hashables
           | iterables
           | sets
def _assert_fail(random_func, Serializer, kwargs):
    data = {key: val for key, val in kwargs.items() if val is not None}
    obj = next(flatten(random_func(1)))
    serializer = Serializer(obj, data=data, partial=True)
    valid = serializer.is_valid()
    assert not valid or (valid and not data)
def test_beam_enumerator():
    p1 = Pipeline([("clf", LogisticRegression(penalty="l2"))])
    p2 = Pipeline([("clf", LogisticRegression(penalty="l1"))])

    _, ops = pt.tree_edit_distance(p1, p2, return_operations=True)
    update_op = [o for o in ops if is_update_edit(o)][0]
    # rule 1: penalty=l2 -> penalty=l1
    r1 = HyperparamUpdate(update_op)

    p5 = Pipeline([("clf", LogisticRegression(penalty="elasticnet"))])
    _, ops = pt.tree_edit_distance(p1, p5, return_operations=True)
    update_op = [o for o in ops if is_update_edit(o)][0]
    # rule 1.5: penalty=l2 -> penalty=elasticnet
    r1_5 = HyperparamUpdate(update_op)

    p3 = Pipeline([("s0", MinMaxScaler()),
                   ("clf", LogisticRegression(penalty="l2"))])
    _, ops = pt.tree_edit_distance(p3, p1, return_operations=True)
    remove_op = [o for o in ops if is_remove_edit(o)][0]

    # rule 2: remove MinMaxScaler
    r2 = ComponentRemove(remove_op)

    p4 = Pipeline([("s0", StandardScaler()),
                   ("clf", LogisticRegression(penalty="l2"))])

    _, ops = pt.tree_edit_distance(p1, p4, return_operations=True)
    insert_op = [o for o in ops if is_insert_edit(o)][0]
    augedit = AugmentedEdit(insert_op, get_parent_match_edit(insert_op, ops))
    # rule 3: insert StandardScaler
    r3 = ComponentInsert(augedit)

    n1 = r1.pre
    n2 = r2.pre
    n3 = r3.pre
    rules = {
        get_node_key(n1): [r1, r1_5],
        get_node_key(n2): [r2],
        get_node_key(n3): [r3],
    }
    node_probs = {
        get_node_key(n1): 0.5,
        get_node_key(n2): 0.2,
        get_node_key(n3): 0.3,
    }
    cond_probs = {
        r1: 0.7,
        r1_5: 0.15,
        r2: 0.3,
        r3: 0.1,
    }

    rule_sampler = FakeRuleSampler(rules, node_probs, cond_probs)

    rs = rule_sampler.sample_rules(n1, return_proba=True)
    assert len(rs) == 2

    enumerator = tree_enumerator.RepeatedBeamSearchEnumerator(
        rule_sampler,
        force_apply=False,
    )

    # # should sort max to min prob by rules
    t1 = pt.to_tree(p1).annotate()
    opt_rules = enumerator.collect_node_rule_probs(t1, past_rules=[], acc={})
    flat_nodes = flatten(t1)
    assert len(opt_rules) == len(flat_nodes)

    # rule 1 is best for that node
    target_n = next(n for n in flat_nodes if n.label == r1.pre.label)
    opt_rule_and_prob = opt_rules[target_n]
    assert opt_rule_and_prob[0] == r1
    # we normalize the conditional probabilities to those that
    # can be applied:
    norm_cond_prob = cond_probs[r1] / (cond_probs[r1] + cond_probs[r1_5])
    expected_prob = node_probs[get_node_key(target_n)] * norm_cond_prob
    assert (opt_rule_and_prob[1] - expected_prob) < 1e-5

    # if we collect optimal node/rules again after using r1, we should get r1_5
    # for that node
    opt_rules = enumerator.collect_node_rule_probs(t1, past_rules=[r1], acc={})
    opt_rule_and_prob = opt_rules[target_n]
    assert opt_rule_and_prob[0] == r1_5
    # we normalize the conditional probabilities to those that
    # can be applied:
    norm_cond_prob = cond_probs[r1_5] / (cond_probs[r1] + cond_probs[r1_5])
    expected_prob = node_probs[get_node_key(target_n)] * norm_cond_prob
    assert (opt_rule_and_prob[1] - expected_prob) < 1e-5

    new_trees, lineage = list(
        enumerator.derive_variant_trees(t1, k=5, past_rules=[]))
    # at most 2 (even though k = 5)
    # penalty=l2->l1, insert(StandardScaler)
    assert len(new_trees) == 2
    assert list(lineage[0])[0] == r1
    assert list(lineage[1])[0] == r3

    gen = enumerator.enumerate(p1, k=5)
    trees_t1 = list(gen)
    # l2->l1, insert(StandardScaler), l2->elastic
    assert len(trees_t1) == 3

    gen = enumerator.enumerate(p5, k=5)
    trees_t2 = list(gen)
    # insert(StandardScaler)
    assert len(trees_t2) == 1

    gen = enumerator.enumerate(p3, k=10)
    trees_t3 = list(gen)
    # (Overall possible rules): outcome pipeline
    # l2 -> l1:                            MinMax, LR(l1)           (yes)
    # l2 -> elastic:                       MinMax, LR(elastic)
    # insert(StandardScaler):              MinMax, SS, LR(l2)       (yes)
    # remove(MinMaxScaler):                LR(L2)                   (yes)
    # insert(SS), l2->l1:                  MinMax, SS, LR(l1)       (yes)
    # insert(SS), l2->elastic:             MinMax, SS, LR(elastic)
    # remove(MM), l2->l1:                  LR(l1)                   (yes)
    # remove(MM), l2->elastic:             LR(elastic)
    # remove(MM), insert(SS)               SS, LR(l2)               (yes)
    # remove(MM), insert(SS), l2->l1:      SS, LR(l1)               (yes)
    # remove(MM), insert(SS), l2->elastic: SS, LR(elastic)
    assert len(trees_t3) < 11