def can_build_rule(edit): return is_update_edit(edit) and \ pt.is_param_node(edit.arg1) and \ pt.is_param_node(edit.arg2) and \ edit.arg1.parent is not None and \ edit.arg2.parent is not None and \ edit.arg1.parent.label == edit.arg2.parent.label
def get_node_key(n): if pt.is_component_node(n): return n.label elif pt.is_param_node(n): return n.payload[0] else: return None
def can_apply(self, node): if not pt.is_param_node( node) or node.payload[0] != self.pre.payload[0]: return False if self.pre.parent.label != get_safe_label(node.parent): # hyperparameters depend on the parent component # so if mismatched, can't really apply return False # for string hypervalues (most likely representing enumerations) # we apply only if there is a match pre_hypervalue = self.pre.payload[1] post_hypervalue = self.post.payload[1] cand_hypervalue = node.payload[1] if post_hypervalue == cand_hypervalue: # no-op, don't bother applying return False if isinstance(pre_hypervalue, str): return pre_hypervalue == cand_hypervalue else: # TODO: for numeric we could add predicate or some form of abstraction # could be learned... return True
def get_random_mutation(self, node): if pt.is_param_node(node): return self.get_hyperparam_mutation(node) elif pt.is_component_node(node) or pt.is_composition_node(node): return self.get_component_mutation(node) else: return None
def test_mutate_any_node(): # should be able to mutate every node (except 'root') sampler = mutate.get_random_mutation_sampler() rule_counter = Counter() for p in data.pipelines: t = pt.to_tree(p) t.annotate() flat_nodes = flatten(t) for n in flat_nodes: rules = sampler.sample_rules(n, return_proba=True) # always has one rule per node assert len(rules) == 1 rule, prob = rules[0] rule_counter[type(rule)] += 1 if pt.is_root_node(n): assert rule is None assert prob == 0.0 continue if pt.is_composition_op(n): # can only insert for these if n.label.endswith("Pipeline"): assert isinstance(rule, ComponentInsert) if n.label.endswith("FeatureUnion"): assert isinstance(rule, (ComponentInsert, ComponentRemove)) if pt.is_param_node(n): assert n.parent is not None parent_label = n.parent.label param = n.payload[0] if parent_label not in sampler.config or param not in sampler.config[ parent_label]: assert isinstance(rule, HyperparamRemove) or rule is None if rule is None: assert prob == 0.0 else: assert prob > 0.0 continue if param == "score_func": assert rule is None assert prob == 0.0 continue if pt.is_component_node(n): if isinstance(rule, ComponentInsert): assert pt.is_composition_node(n) # all others should have non-identity rule assert rule is not None assert prob > 0.0 # should have all rule types (bad luck if don't....) for t in RULE_TYPES: assert rule_counter.get(t, 0.0) > 0
def get_probability(self, rule=None, node=None): assert rule is not None or node is not None if rule is None and node is not None: # prior probs should account for both types of nodes return self.pre_probs.get(get_node_key(node), 0.0) if pt.is_component_node(node): return self.component_rule_predictor.get_probability(rule, node) if pt.is_param_node(node): return self.hyperparam_rule_predictor.get_probability(rule, node)
def sample_rules(self, n, return_proba=False, random_state=None): rules = [None] if pt.is_component_node(n): rules.extend( self.component_rule_predictor.predict( n, return_proba=return_proba)) if pt.is_param_node(n): rules.extend( self.hyperparam_rule_predictor.predict( n, return_proba=return_proba)) if return_proba: # set small prob to None rules[0] = (rules[0], DEFAULT_SMALL_PROB) return rules
def can_apply(self, node): if not pt.is_param_node( node) or node.payload[0] != self.pre.payload[0]: return False if self.pre.parent.label != get_safe_label(node.parent): # requires component context to effectively apply return False # for string hypervalues (most likely representing enumerations) # we apply only if there is a match pre_hypervalue = self.pre.payload[1] cand_hypervalue = node.payload[1] if isinstance(pre_hypervalue, str): return pre_hypervalue == cand_hypervalue else: # TODO: for numeric we could add predicate or some form of abstraction # could be learned... return True
def info_from_node(node): info = { "parent": get_safe_label(node.parent), "hyperparam": node.payload[0], "hypervalue": value_as_feature(node.payload[1]), "hypervalue_type": str(type(node.payload[1])), } for c in node.siblings(): try: if pt.is_param_node(c): name, val = c.payload info["sibling_param_name_" + name] = True info["sibling_param_value_" + name] = value_as_feature(val) elif pt.is_component_node(c): info["sibling_component_" + c.label] = True else: pass except: pass return info
def recursive_enumerate(self, node, rules_per_node=3): applied = 0 rules = self.rule_sampler.sample_rules(node) for rule in rules: if applied >= rules_per_node: break if rule is None: new_node = node elif rule.can_apply(node): new_node = rule.apply(node) applied += 1 else: continue if pt.is_param_node(new_node) or new_node is None: yield new_node continue # modify children of the transformed node # to make sure we have valid children subtree_gens = [] for ix, child in enumerate(new_node.children): # generator for subtrees rooted at this child gen = self.recursive_enumerate(child, rules_per_node) subtree_gens.append(gen) # cartesian product over subtrees rooted at each child for subtrees in itertools.product(*subtree_gens): # insert this set of subtrees # and return to constitute a new tree # rooted at `new_node` new_tree = replace_children( new_node, subtrees, ) yield new_tree
def can_build_rule(edit): return is_remove_edit(edit) and \ pt.is_param_node(edit.arg1) and \ edit.arg1.parent is not None and \ edit.arg2 is None