Example #1
0
    def can_build_rule(edit):
        base_cond = is_update_edit(edit) and \
            pt.is_component_node(edit.arg1) and \
            pt.is_component_node(edit.arg2) and \
            edit.arg1.parent is not None

        if not base_cond:
            return False
        # want to also try compiling the post on its own
        try:
            pt.to_pipeline(edit.arg2)
            return True
        except:
            return False
Example #2
0
def get_node_key(n):
    if pt.is_component_node(n):
        return n.label
    elif pt.is_param_node(n):
        return n.payload[0]
    else:
        return None
Example #3
0
    def can_apply(self, node):
        if not pt.is_component_node(node) or node.label != self.pre.label:
            return False

        return self.pre.parent.label == get_safe_label(node.parent) or \
            get_safe_label(self.pre.left) == get_safe_label(node.left) or \
            get_safe_label(self.pre.right) == get_safe_label(node.right)
Example #4
0
 def get_random_mutation(self, node):
     if pt.is_param_node(node):
         return self.get_hyperparam_mutation(node)
     elif pt.is_component_node(node) or pt.is_composition_node(node):
         return self.get_component_mutation(node)
     else:
         return None
Example #5
0
 def info_from_node(node):
     info = {
         "parent": get_safe_label(node.parent),
         "component": node.label,
     }
     for c in node.siblings():
         if pt.is_component_node(c):
             info[c.label] = True
     return info
Example #6
0
def test_mutate_any_node():
    # should be able to mutate every node (except 'root')
    sampler = mutate.get_random_mutation_sampler()
    rule_counter = Counter()
    for p in data.pipelines:
        t = pt.to_tree(p)
        t.annotate()
        flat_nodes = flatten(t)

        for n in flat_nodes:
            rules = sampler.sample_rules(n, return_proba=True)
            # always has one rule per node
            assert len(rules) == 1
            rule, prob = rules[0]
            rule_counter[type(rule)] += 1
            if pt.is_root_node(n):
                assert rule is None
                assert prob == 0.0
                continue

            if pt.is_composition_op(n):
                # can only insert for these
                if n.label.endswith("Pipeline"):
                    assert isinstance(rule, ComponentInsert)
                if n.label.endswith("FeatureUnion"):
                    assert isinstance(rule, (ComponentInsert, ComponentRemove))

            if pt.is_param_node(n):
                assert n.parent is not None
                parent_label = n.parent.label
                param = n.payload[0]
                if parent_label not in sampler.config or param not in sampler.config[
                        parent_label]:
                    assert isinstance(rule, HyperparamRemove) or rule is None
                    if rule is None:
                        assert prob == 0.0
                    else:
                        assert prob > 0.0
                    continue

                if param == "score_func":
                    assert rule is None
                    assert prob == 0.0
                    continue

            if pt.is_component_node(n):
                if isinstance(rule, ComponentInsert):
                    assert pt.is_composition_node(n)

            # all others should have non-identity rule
            assert rule is not None
            assert prob > 0.0

    # should have all rule types (bad luck if don't....)
    for t in RULE_TYPES:
        assert rule_counter.get(t, 0.0) > 0
Example #7
0
    def get_probability(self, rule=None, node=None):
        assert rule is not None or node is not None
        if rule is None and node is not None:
            # prior probs should account for both types of nodes
            return self.pre_probs.get(get_node_key(node), 0.0)

        if pt.is_component_node(node):
            return self.component_rule_predictor.get_probability(rule, node)
        if pt.is_param_node(node):
            return self.hyperparam_rule_predictor.get_probability(rule, node)
Example #8
0
    def can_apply(self, node):
        if not pt.is_component_node(node) or node.label != self.pre.label:
            return False

        if node.label == self.post.label:
            # no-op
            return False

        # at least one direct neighbor matches
        return self.pre.parent.label == get_safe_label(node.parent) or \
            get_safe_label(self.pre.left) == get_safe_label(node.left) or \
            get_safe_label(self.pre.right) == get_safe_label(node.right)
def must_delete_subtree(tree, new_children):
    """
    Some subtrees become no-ops (or invalid) if they have
    no children, so check if that is the case
    """
    del_if_empty_combinators = [
        "sklearn.pipeline.Pipeline",
        "tpot.builtins.stacking_estimator.StackingEstimator"
    ]
    return (pt.is_root_node(tree) or
            (pt.is_component_node(tree)
             and tree.label in del_if_empty_combinators)) and all(
                 c is None for c in new_children)
Example #10
0
 def sample_rules(self, n, return_proba=False, random_state=None):
     rules = [None]
     if pt.is_component_node(n):
         rules.extend(
             self.component_rule_predictor.predict(
                 n, return_proba=return_proba))
     if pt.is_param_node(n):
         rules.extend(
             self.hyperparam_rule_predictor.predict(
                 n, return_proba=return_proba))
     if return_proba:
         # set small prob to None
         rules[0] = (rules[0], DEFAULT_SMALL_PROB)
     return rules
Example #11
0
 def info_from_node(node):
     info = {
         "parent": get_safe_label(node.parent),
         "hyperparam": node.payload[0],
         "hypervalue": value_as_feature(node.payload[1]),
         "hypervalue_type": str(type(node.payload[1])),
     }
     for c in node.siblings():
         try:
             if pt.is_param_node(c):
                 name, val = c.payload
                 info["sibling_param_name_" + name] = True
                 info["sibling_param_value_" + name] = value_as_feature(val)
             elif pt.is_component_node(c):
                 info["sibling_component_" + c.label] = True
             else:
                 pass
         except:
             pass
     return info
Example #12
0
 def can_build_rule(edit):
     return is_remove_edit(edit) and \
         pt.is_component_node(edit.arg1) and \
         edit.arg2 is None and \
         edit.arg1.parent is not None