def test_all_graphs_functions(): G = nx.DiGraph() G = add_node_after(G, 1) G = add_node_after(G, 2, 1) G = add_node_after(G, 3, 2) G = add_node_after(G, 4) G = add_node_after(G, 5, 4) G = add_node_after(G, 6, 5, 3) assert set(get_terminal_nodes(G)) == {6} assert set(get_starting_nodes(G)) == {1, 4} assert set(get_all_successors(G, 1)) == {2, 3, 6} assert set(get_all_successors(G, 2)) == {3, 6} assert set(get_all_successors(G, 3)) == {6} assert set(get_all_successors(G, 4)) == {5, 6} assert set(get_all_successors(G, 5)) == {6} assert set(get_all_successors(G, 6)) == set() assert set(get_all_predecessors(G, 1)) == set() assert set(get_all_predecessors(G, 2)) == {1} assert set(get_all_predecessors(G, 3)) == {1, 2} assert set(get_all_predecessors(G, 4)) == set() assert set(get_all_predecessors(G, 5)) == {4} assert set(get_all_predecessors(G, 6)) == {1, 2, 3, 4, 5}
def assert_model_graph_structure(G): """ verification on the structure of the graph """ # only one terminal node if len(gh.get_terminal_nodes(G)) != 1: raise ValueError("I should have only one terminal node") # connex graph if not gh.is_connected(G): raise ValueError("the graph should be connected") # no cycle if gh.has_cycle(G): raise ValueError("The graph shouldn't have any cycle") for node in G.nodes: if is_composition_model(node): successors = list(G.successors(node)) if len(successors) == 0: raise ValueError("Composition node %s has no successor" % str(node)) for successor in successors: predecessors = list(G.predecessors(successor)) if predecessors != [node]: raise ValueError( "The node %s has more than one parent, which is impossible for a child of a composition node (%s)" % (str(successor), str(node)))
def assert_model_graph_structure(G): """ verification on the structure of the graph """ # only one terminal node if len(gh.get_terminal_nodes(G)) != 1: raise ValueError("I should have only one terminal node") # connex graph if not gh.is_connected(G): raise ValueError("the graph should be connected") # no cycle if gh.has_cycle(G): raise ValueError("The graph shouldn't have any cycle") for node in G.nodes: if StepCategories.is_composition_step(node[0]): if len(list(G.successors(node))) == 0: raise ValueError("Composition node %s has no successor" % node) for node in G.nodes: if StepCategories.is_composition_step(node[0]): successors = gh.get_all_successors(G, node) predecessors = gh.get_all_predecessors(G, node) if not gh.is_it_a_partition(list(G.nodes), [successors, [node], predecessors]): raise ValueError("Incorrect split around composition node %s" % node)
def _verif_graph_structure(self): """ verification on the structure of the graph """ # Only one terminal node terminal_nodes = get_terminal_nodes(self.complete_graph) if len(terminal_nodes) != 1: raise ValueError("the graph should have only one terminal node, instead i got %d" % len(terminal_nodes)) # Connexe if not nx.is_connected(self.complete_graph.to_undirected()): raise ValueError("the graph should be connected") # No Cycle has_error = False try: nx.find_cycle(self.complete_graph) except nx.NetworkXNoCycle: has_error = True if not has_error: raise ValueError("The graph shouldn't have any cycle") # Verif that I have model everywhere for name, model in self._models.items(): # Terminal state if name in terminal_nodes: if not hasattr(model, "fit"): raise TypeError("The terminal step (%s) should have a fit method" % name) else: if not hasattr(model, "fit") or not hasattr(model, "fit_transform") or not hasattr(model, "transform"): raise TypeError( "Intermediary step (%s) should have a 'fit','fit_transform' and 'transform' method" % name )
def _rec_convert_graph_to_code(Graph, all_models_params, models_dico, model_name_mapping=None, composition_already_done=None): """ recursive function used to convert a Graph into a json code See convert_graph_to_code """ if composition_already_done is None: composition_already_done = set() if len(Graph.nodes) == 1: node = list(Graph.nodes)[0] return models_dico[node] node = _find_first_composition_node(Graph, composition_already_done) if node is not None: successors = list(Graph.successors(node)) assert len(successors) > 0 else: successors = [] if node is None or len(successors) == 0: ### ** It's means I'll return a GraphPipeline ** ### # 2 cases : # * nodes is None : meaning there is no composition node if len(successors) > 0: raise ValueError( "a composition node should have at most one successor '%s'" % str(node)) # assert len(successors) > 0 # it shouldn't append ... # 1) either it an original node => composition node => no successor isn't possible # 2) the node was already handled => should have been in the list edges = gh.edges_from_graph(Graph) if model_name_mapping is None: model_name_mapping = _create_name_mapping(list(Graph.nodes)) # each node in graph will be mapped to a name within the GraphPipeline models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes} edges = [ tuple((model_name_mapping[e] for e in edge)) for edge in edges ] return (SpecialModels.GraphPipeline, { "models": models, "edges": edges }) composition_already_done.add(node) # to prevent looping on the same node all_sub_branch_nodes = {} all_terminal_nodes = [] for successor in successors: sub_branch_nodes = list( gh.subbranch_search(starting_node=successor, Graph=Graph, visited={node})) all_sub_branch_nodes[successor] = sub_branch_nodes assert successor in sub_branch_nodes sub_Graph = Graph.subgraph(sub_branch_nodes) all_terminal_nodes += gh.get_terminal_nodes(sub_Graph) models_dico[successor] = _rec_convert_graph_to_code( sub_Graph, all_models_params=all_models_params, models_dico=models_dico, model_name_mapping=model_name_mapping, composition_already_done=composition_already_done, ) # Check all_s = [ frozenset(Graph.successors(t_node)) for t_node in all_terminal_nodes ] if len(set(all_s)) != 1: # By convention, if we look at the nodes AFTER the composition # (ie : the successors of the terminal nodes of the part of the graph that will be merged by the composition) # Those nodes should have the same list of successors. Those successors will be the successors of the merged node raise ValueError( "The successor at the end of the composition node %s are not always the same" % str(node)) if len(successors) == 1: # Only one sucessor of composition node models_dico[node] = (_klass_from_node(node), models_dico[successors[0]], all_models_params[node]) elif len(successors) > 1: models_dico[node] = ( _klass_from_node(node), [models_dico[successor] for successor in successors], all_models_params[node], ) else: raise NotImplementedError("can't go there") # Now I need to merge 'node' with all the sub-branches nodes_mapping = {} for successor, sub_branch_nodes in all_sub_branch_nodes.items(): for n in sub_branch_nodes: nodes_mapping[n] = node Gmerged = gh.merge_nodes(Graph, nodes_mapping=nodes_mapping) # All the node in successor will be 'fused' with 'node' ... # Recurse now, that the composition node is taken care of return _rec_convert_graph_to_code( Gmerged, all_models_params=all_models_params, models_dico=models_dico, model_name_mapping=model_name_mapping, composition_already_done=composition_already_done, )
def create_graphical_representation(steps): """ from a an OrderedDict of steps create a Graphical reprensetation of the model we'll use """ # Rmk : il faut a priori, mettre les numero de l'etape dans le graph # + mettre les labels correct # comme ça on pourra avoir plusieurs noeud avec le meme nom (Ex : Scaler...) ### 1) Split Composion Steps vs Rest all_composition_steps = [] all_others = [] for (step_name, model_name), var_type in steps.items(): if StepCategories.is_composition_step(step_name): all_composition_steps.append((step_name, model_name, var_type)) else: all_others.append((step_name, model_name, var_type)) ### 2) Create Graph for non-composition step new_steps = OrderedDict() G = nx.DiGraph() for step_name, model_name, var_type in all_others: # for name,var_type in steps.items(): unested_var_type = unnest_tuple(var_type) terminal_nodes = gh.get_terminal_nodes( G ) # Terminal links : I'll add the new step on one (or more) of those ending_node_type = { unnest_tuple(steps[node]): node for node in terminal_nodes } node_name = (step_name, model_name) # 2-uple if node_name in G.nodes: raise ValueError("This node already exists '(%s,%s)'" % node_name) # 1) Soit je rattache le nouveau a UN noeud terminal # 2) Soit je cree une nouvelle branche (nouveau noeud ratacher a rien) # 3) Soit je rattache a PLUSIEURS noeud terminaux elif unested_var_type in ending_node_type: ### 1) I already have a branch of this type last_node = ending_node_type[unested_var_type] G = gh.add_node_after(G, node_name, last_node) ### I don't have a branch ### else: all_candidates = [(t, n) for t, n in ending_node_type.items() if tuple_include(t, unested_var_type)] # I need to look where I want to plug it # if len(all_candidates) == 0: ### 2) Je dois creer une nouvelle branche : aucun noeud ### G = gh.add_node_after(G, node_name) else: ### 3) Je rattache a plusieurs noeuds ### Ici : il faut parfois rajouter un noeud en AMONT, si on a des types qui n'ont pas ete rajouter types_added = unnest_tuple([t for t, n in all_candidates]) types_not_added = diff(unested_var_type, types_added) if len(types_not_added) > 0: name_of_cat = "Selector_%s" % unnest_tuple(types_not_added) new_node = (name_of_cat, (name_of_cat, SpecialModels.ColumnsSelector)) G = gh.add_node_after(G, new_node) new_steps[ new_node] = types_not_added # I also must dynamically add the node to the list of steps all_candidates = all_candidates + [ (types_not_added, new_node) ] G = gh.add_node_after(G, node_name, *[n for t, n in all_candidates]) ### 3) Include composition node on top for step_name, model_name, _ in reversed(all_composition_steps): starting_nodes = gh.get_starting_nodes(G) for n in starting_nodes: G.add_edge((step_name, model_name), n) ### 4) Verify the Graph structure for (step_name, model_name), _ in steps.items(): if (step_name, model_name) not in G: raise ValueError("'(%s , %s)' should be in graph" % (step_name, model_name)) # all nodes were in the steps for node in G.nodes(): if node not in steps and node not in new_steps: raise ValueError("'(%s,%s)' shouldn't be in graph" % node) assert_model_graph_structure(G) return G, new_steps
def create_graph(self): """ create the graphical structure """ self.complete_graph = graph_from_edges(*self._edges) self._verif_graph_structure() self._terminal_node = get_terminal_nodes(self.complete_graph)[0] self._nodes_order = list(iter_graph(self.complete_graph))
def test_RandomModelGenerator_iterator(type_of_iterator, num_only): dfX, y, auto_ml_config = get_automl_config(num_only) random_model_generator = RandomModelGenerator( auto_ml_config=auto_ml_config, random_state=123) if type_of_iterator == "default": iterator = random_model_generator.iterator_default_models() elif type_of_iterator == "block_search": iterator = random_model_generator.iterate_block_search( random_order=False) elif type_of_iterator == "block_search_random": iterator = random_model_generator.iterate_block_search( random_order=True) assert hasattr(iterator, "__iter__") # verif iterator for model in iterator: assert isinstance(model, tuple) assert len(model) == 3 Graph, all_models_params, block_to_use = model terminal_nodes = get_terminal_nodes(Graph) assert len(terminal_nodes) == 1 assert terminal_nodes[0][0] == StepCategories.Model #graphviz_graph(Graph) assert hasattr(Graph, "edges") assert hasattr(Graph, "nodes") assert isinstance(all_models_params, dict) for node in Graph.nodes: assert node in all_models_params assert isinstance(block_to_use, (tuple, list)) for b in block_to_use: assert b in TypeOfVariables.alls result = convert_graph_to_code(Graph, all_models_params, also_returns_mapping=True) assert isinstance(result, dict) assert "name_mapping" in result assert "json_code" in result sk_model = sklearn_model_from_param(result["json_code"]) assert hasattr(sk_model, "fit") if type_of_iterator == "default" and ('Model', ( 'Model', 'RandomForestClassifier')) in Graph.nodes: # in that case I'll actually do the fitting here # I'll simplify the model to have 2 estimators (faster) all_models_params[('Model', ('Model', 'RandomForestClassifier'))]["n_estimators"] = 2 result = convert_graph_to_code(Graph, all_models_params, also_returns_mapping=True) sk_model = sklearn_model_from_param(result["json_code"]) sub_index = np.concatenate( (np.where(y == 0)[0][0:10], np.where(y == 1)[0][0:10]), axis=0) # Needs at least 20 observations to make sure all transformers works sk_model.fit(dfX.iloc[sub_index, :], y[sub_index]) yhat = sk_model.predict(dfX.head(2)) assert yhat.shape == (2, )