def assert_model_graph_structure(G): """ verification on the structure of the graph """ # only one terminal node if len(gh.get_terminal_nodes(G)) != 1: raise ValueError("I should have only one terminal node") # connex graph if not gh.is_connected(G): raise ValueError("the graph should be connected") # no cycle if gh.has_cycle(G): raise ValueError("The graph shouldn't have any cycle") for node in G.nodes: if StepCategories.is_composition_step(node[0]): if len(list(G.successors(node))) == 0: raise ValueError("Composition node %s has no successor" % node) for node in G.nodes: if StepCategories.is_composition_step(node[0]): successors = gh.get_all_successors(G, node) predecessors = gh.get_all_predecessors(G, node) if not gh.is_it_a_partition(list(G.nodes), [successors, [node], predecessors]): raise ValueError("Incorrect split around composition node %s" % node)
def test_all_graphs_functions(): G = nx.DiGraph() G = add_node_after(G, 1) G = add_node_after(G, 2, 1) G = add_node_after(G, 3, 2) G = add_node_after(G, 4) G = add_node_after(G, 5, 4) G = add_node_after(G, 6, 5, 3) assert set(get_terminal_nodes(G)) == {6} assert set(get_starting_nodes(G)) == {1, 4} assert set(get_all_successors(G, 1)) == {2, 3, 6} assert set(get_all_successors(G, 2)) == {3, 6} assert set(get_all_successors(G, 3)) == {6} assert set(get_all_successors(G, 4)) == {5, 6} assert set(get_all_successors(G, 5)) == {6} assert set(get_all_successors(G, 6)) == set() assert set(get_all_predecessors(G, 1)) == set() assert set(get_all_predecessors(G, 2)) == {1} assert set(get_all_predecessors(G, 3)) == {1, 2} assert set(get_all_predecessors(G, 4)) == set() assert set(get_all_predecessors(G, 5)) == {4} assert set(get_all_predecessors(G, 6)) == {1, 2, 3, 4, 5}
def get_subpipeline(self, end_node, deepcopy_models=False): """ create a New model that corresponds to the original GraphPipeline but with a new ending node If the original GraphPipeline was fitted, the new model will also be fitted Parameters ---------- end_node : str the name of the node at which the new pipeline will stop. Must be in the Graph deepcopy_models : boolean, default=False if True will make a deepcopy of the models. Returns ------- new GraphPipeline instance """ self._complete_init() if end_node not in self.complete_graph: raise ValueError("the node '%s' isn't in the original graph" % end_node) # get all predecessors of nodes => to include in the graph predecessors = get_all_predecessors(self.complete_graph, end_node) nodes_to_keep = list(predecessors) + [end_node] # Remark : we could separated into a submethod to create a subpipeline from a list of nodes if len(nodes_to_keep) == 1: assert end_node == nodes_to_keep[0] return self._models[end_node] # filter edges edges_to_keep = [] for e1, e2 in get_two_by_two_edges(*self._edges): if e1 in nodes_to_keep and e2 in nodes_to_keep: edges_to_keep.append((e1, e2)) # I do that insteaf of : # complete_graph_sub.subgraph(nodes_to_keep).edges # beacause that way I preseve the order of the edges, which handle the concatenation order # Retrieve sklearn model if deepcopy_models: models = { node: deepcopy(self._models[node]) for node in nodes_to_keep } else: models = {node: self._models[node] for node in nodes_to_keep} # Change 'no_concat_nodes' if self.no_concat_nodes is None: no_concat_nodes = None else: no_concat_nodes = [ n for n in self.no_concat_nodes if n in nodes_to_keep ] no_concat_nodes = type(self.no_concat_nodes)(no_concat_nodes) if len(no_concat_nodes) == 0: no_concat_nodes = None ############################### ### Create new pipeline ### ############################### sub_pipeline = GraphPipeline(models=models, edges=edges_to_keep, verbose=self.verbose, no_concat_nodes=no_concat_nodes) # Internal modification to change the state if self._preparation_done: sub_pipeline._complete_init() if not self._already_fitted: return sub_pipeline # here the pipeline was fitted sub_pipeline._already_fitted = True sub_pipeline._Xinput_features = deepcopy( self._Xinput_features) #copy just to be safe sub_pipeline._all_concat_order = dico_key_filter( self._all_concat_order, lambda n: n in nodes_to_keep) sub_pipeline._all_concat_type = dico_key_filter( self._all_concat_type, lambda n: n in nodes_to_keep) return sub_pipeline
def _rec_convert_graph_to_code_OLD2(Graph, all_models_params, models_dico, model_name_mapping=None): """ recursive function used to convert a Graph into a json code See convert_graph_to_code """ ### ** only one node in Graph : I'll return what was saved in models_dico ** ### if len(Graph.nodes) == 1: node = list(Graph.nodes)[0] return models_dico[node] node = _find_first_composition_node(Graph) if node is not None: predecessors = gh.get_all_predecessors(Graph, node) successors = gh.get_all_successors(Graph, node) if not gh.is_it_a_partition(list(Graph.nodes), [predecessors, [node], successors]): raise ValueError("Incorrect graph, wrong split around node %s" % str(node)) else: predecessors = [] successors = [] if node is None or len(successors) == 0: ### ** It's means I'll return a GraphPipeline ** ### edges = gh.edges_from_graph(Graph) if model_name_mapping is None: model_name_mapping = _create_name_mapping(list(Graph.nodes)) # each node in graph will be mapped to a name within the GraphPipeline models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes} edges = [ tuple((model_name_mapping[e] for e in edge)) for edge in edges ] return (SpecialModels.GraphPipeline, { "models": models, "edges": edges }) Graph_bellow = Graph.subgraph(successors) connected_Gbellow = gh.get_connected_graphs(Graph_bellow) if len(predecessors) == 0 and len(connected_Gbellow) > 1: return ( _klass_from_node(node), [ _rec_convert_graph_to_code_OLD2(Gb, all_models_params, models_dico, model_name_mapping) for Gb in connected_Gbellow ], all_models_params[node], ) elif len(predecessors) == 0 and len(connected_Gbellow) == 1: return ( _klass_from_node(node), _rec_convert_graph_to_code_OLD2(Graph_bellow, all_models_params, models_dico, model_name_mapping), all_models_params[node], ) else: G_bellow_and_node = Graph.subgraph([node] + successors) G_above = Graph.subgraph(predecessors + [node]) models_dico[node] = _rec_convert_graph_to_code_OLD2( G_bellow_and_node, all_models_params, models_dico, model_name_mapping) return _rec_convert_graph_to_code(G_above, all_models_params, models_dico, model_name_mapping)
def _rec_convert_graph_to_code_OLD(G, all_params): """ recursive function to convert a graph into a json representation """ if len(G.nodes) == 0: return {} ### 1) Find First composition node has_composition = False for node in gh.iter_graph(G): if StepCategories.is_composition_step(node[0]): has_composition = True break return_gpipe = not has_composition if has_composition: ### If there is a composition node, I need to split between what is above and what is bellow predecessors = gh.get_all_predecessors(G, node) successors = gh.get_all_successors(G, node) if not gh.is_it_a_partition(list(G.nodes), [predecessors, [node], successors]): raise ValueError("Incorrect graph, wrong split around node %s" % str(node)) if len(successors) == 0: # If nothing bellow, I'll be able to return something return_gpipe = True if return_gpipe: if len(G.nodes) > 1: ### I'll create a GraphPipeline object edges = gh.edges_from_graph(G) model_name_mapping = _create_name_mapping(list(G.nodes)) # each node in graph will be mapped to a name within the GraphPipeline models = {model_name_mapping[n]: all_params[n] for n in G.nodes} edges = [ tuple((model_name_mapping[e] for e in edge)) for edge in edges ] return (SpecialModels.GraphPipeline, { "models": models, "edges": edges }) else: ### Otherwise it is just the model_name with its parameters return node[1][1], all_params[list(G.nodes)[0]] G_above = G.subgraph(predecessors + [node]) G_bellow = G.subgraph(successors) connected_Gbellow = gh.get_connected_graphs(G_bellow) if len(connected_Gbellow) == 1: # what is bellow is a 'connected graph' : it means that the composition need should be applied to One model all_params[node] = _rec_convert_graph_to_code_OLD(G_bellow, all_params) else: # otherwise, the composition will be applied to a list of models all_params[node] = [ _rec_convert_graph_to_code_OLD(g, all_params) for g in connected_Gbellow ] return _rec_convert_graph_to_code_OLD(G_above, all_params)