Ejemplo n.º 1
0
 def can_build_rule(edit):
     return is_update_edit(edit) and \
         pt.is_param_node(edit.arg1) and \
         pt.is_param_node(edit.arg2) and \
         edit.arg1.parent is not None and \
         edit.arg2.parent is not None and \
         edit.arg1.parent.label == edit.arg2.parent.label
Ejemplo n.º 2
0
def get_node_key(n):
    if pt.is_component_node(n):
        return n.label
    elif pt.is_param_node(n):
        return n.payload[0]
    else:
        return None
Ejemplo n.º 3
0
    def can_apply(self, node):
        if not pt.is_param_node(
                node) or node.payload[0] != self.pre.payload[0]:
            return False

        if self.pre.parent.label != get_safe_label(node.parent):
            # hyperparameters depend on the parent component
            # so if mismatched, can't really apply
            return False

        # for string hypervalues (most likely representing enumerations)
        # we apply only if there is a match
        pre_hypervalue = self.pre.payload[1]
        post_hypervalue = self.post.payload[1]
        cand_hypervalue = node.payload[1]

        if post_hypervalue == cand_hypervalue:
            # no-op, don't bother applying
            return False

        if isinstance(pre_hypervalue, str):
            return pre_hypervalue == cand_hypervalue
        else:
            # TODO: for numeric we could add predicate or some form of abstraction
            # could be learned...
            return True
Ejemplo n.º 4
0
 def get_random_mutation(self, node):
     if pt.is_param_node(node):
         return self.get_hyperparam_mutation(node)
     elif pt.is_component_node(node) or pt.is_composition_node(node):
         return self.get_component_mutation(node)
     else:
         return None
Ejemplo n.º 5
0
def test_mutate_any_node():
    # should be able to mutate every node (except 'root')
    sampler = mutate.get_random_mutation_sampler()
    rule_counter = Counter()
    for p in data.pipelines:
        t = pt.to_tree(p)
        t.annotate()
        flat_nodes = flatten(t)

        for n in flat_nodes:
            rules = sampler.sample_rules(n, return_proba=True)
            # always has one rule per node
            assert len(rules) == 1
            rule, prob = rules[0]
            rule_counter[type(rule)] += 1
            if pt.is_root_node(n):
                assert rule is None
                assert prob == 0.0
                continue

            if pt.is_composition_op(n):
                # can only insert for these
                if n.label.endswith("Pipeline"):
                    assert isinstance(rule, ComponentInsert)
                if n.label.endswith("FeatureUnion"):
                    assert isinstance(rule, (ComponentInsert, ComponentRemove))

            if pt.is_param_node(n):
                assert n.parent is not None
                parent_label = n.parent.label
                param = n.payload[0]
                if parent_label not in sampler.config or param not in sampler.config[
                        parent_label]:
                    assert isinstance(rule, HyperparamRemove) or rule is None
                    if rule is None:
                        assert prob == 0.0
                    else:
                        assert prob > 0.0
                    continue

                if param == "score_func":
                    assert rule is None
                    assert prob == 0.0
                    continue

            if pt.is_component_node(n):
                if isinstance(rule, ComponentInsert):
                    assert pt.is_composition_node(n)

            # all others should have non-identity rule
            assert rule is not None
            assert prob > 0.0

    # should have all rule types (bad luck if don't....)
    for t in RULE_TYPES:
        assert rule_counter.get(t, 0.0) > 0
Ejemplo n.º 6
0
    def get_probability(self, rule=None, node=None):
        assert rule is not None or node is not None
        if rule is None and node is not None:
            # prior probs should account for both types of nodes
            return self.pre_probs.get(get_node_key(node), 0.0)

        if pt.is_component_node(node):
            return self.component_rule_predictor.get_probability(rule, node)
        if pt.is_param_node(node):
            return self.hyperparam_rule_predictor.get_probability(rule, node)
Ejemplo n.º 7
0
 def sample_rules(self, n, return_proba=False, random_state=None):
     rules = [None]
     if pt.is_component_node(n):
         rules.extend(
             self.component_rule_predictor.predict(
                 n, return_proba=return_proba))
     if pt.is_param_node(n):
         rules.extend(
             self.hyperparam_rule_predictor.predict(
                 n, return_proba=return_proba))
     if return_proba:
         # set small prob to None
         rules[0] = (rules[0], DEFAULT_SMALL_PROB)
     return rules
Ejemplo n.º 8
0
    def can_apply(self, node):
        if not pt.is_param_node(
                node) or node.payload[0] != self.pre.payload[0]:
            return False

        if self.pre.parent.label != get_safe_label(node.parent):
            # requires component context to effectively apply
            return False

        # for string hypervalues (most likely representing enumerations)
        # we apply only if there is a match
        pre_hypervalue = self.pre.payload[1]
        cand_hypervalue = node.payload[1]

        if isinstance(pre_hypervalue, str):
            return pre_hypervalue == cand_hypervalue
        else:
            # TODO: for numeric we could add predicate or some form of abstraction
            # could be learned...
            return True
Ejemplo n.º 9
0
 def info_from_node(node):
     info = {
         "parent": get_safe_label(node.parent),
         "hyperparam": node.payload[0],
         "hypervalue": value_as_feature(node.payload[1]),
         "hypervalue_type": str(type(node.payload[1])),
     }
     for c in node.siblings():
         try:
             if pt.is_param_node(c):
                 name, val = c.payload
                 info["sibling_param_name_" + name] = True
                 info["sibling_param_value_" + name] = value_as_feature(val)
             elif pt.is_component_node(c):
                 info["sibling_component_" + c.label] = True
             else:
                 pass
         except:
             pass
     return info
Ejemplo n.º 10
0
    def recursive_enumerate(self, node, rules_per_node=3):
        applied = 0
        rules = self.rule_sampler.sample_rules(node)
        for rule in rules:
            if applied >= rules_per_node:
                break
            if rule is None:
                new_node = node
            elif rule.can_apply(node):
                new_node = rule.apply(node)
                applied += 1
            else:
                continue

            if pt.is_param_node(new_node) or new_node is None:
                yield new_node
                continue

            # modify children of the transformed node
            # to make sure we have valid children
            subtree_gens = []
            for ix, child in enumerate(new_node.children):
                # generator for subtrees rooted at this child
                gen = self.recursive_enumerate(child, rules_per_node)
                subtree_gens.append(gen)

            # cartesian product over subtrees rooted at each child
            for subtrees in itertools.product(*subtree_gens):
                # insert this set of subtrees
                # and return to constitute a new tree
                # rooted at `new_node`
                new_tree = replace_children(
                    new_node,
                    subtrees,
                )
                yield new_tree
Ejemplo n.º 11
0
 def can_build_rule(edit):
     return is_remove_edit(edit) and \
         pt.is_param_node(edit.arg1) and \
         edit.arg1.parent is not None and \
         edit.arg2 is None