def test_graph_from_edges_string(): edges_strings = [ "A - B - C ; D - C", "A - B ; B - C ; D - C", "A -> B -> C ; D -> C", "A-B;B-C;D-C", "A->B;B->C;D-C;", ] for edges_string in edges_strings: G = graph_from_edges_string(edges_string) assert set(G.nodes) == {"A", "B", "C", "D"} assert set(G.edges) == {("A", "B"), ("B", "C"), ("D", "C")} edges2 = edges_from_graph(G) G2 = graph_from_edges(*edges2) assert set(G2.nodes) == set(G.nodes) assert set(G2.edges) == set(G.edges) edges_strings = ["1 - 2 - 3 ; 4 - 3", "1 - 2 ; 2 - 3 ; 4 - 3"] for edges_string in edges_strings: G = graph_from_edges_string(edges_string) assert set(G.nodes) == {1, 2, 3, 4} assert set(G.edges) == {(1, 2), (2, 3), (4, 3)} edges2 = edges_from_graph(G) G2 = graph_from_edges(*edges2) assert set(G2.nodes) == set(G.nodes) assert set(G2.edges) == set(G.edges)
def test_edges_from_graph(): G = nx.DiGraph() G.add_node("A") G.add_node("B") assert edges_from_graph(G) == [("A", ), ("B", )] G = nx.DiGraph() G.add_node("B") G.add_node("A") assert edges_from_graph(G) == [("A", ), ("B", )] G = graph_from_edges(("A", "B"), ("C", "D")) assert set(G.nodes) == {'A', 'B', 'C', 'D'} assert set(G.edges) == {('A', 'B'), ('C', 'D')} assert edges_from_graph(G) == [('A', 'B'), ('C', 'D')]
def test_edges_from_graph(): G = nx.DiGraph() G.add_node("A") G.add_node("B") assert edges_from_graph(G) == [("A", ), ("B", )] G = nx.DiGraph() G.add_node("B") G.add_node("A") assert edges_from_graph(G) == [("A", ), ("B", )] G = graph_from_edges(("A", "B"), ("C", "D")) assert set(G.nodes) == {"A", "B", "C", "D"} assert set(G.edges) == {("A", "B"), ("C", "D")} assert edges_from_graph(G) == [("A", "B"), ("C", "D")]
def _rec_convert_graph_to_code(Graph, all_models_params, models_dico, model_name_mapping=None, composition_already_done=None): """ recursive function used to convert a Graph into a json code See convert_graph_to_code """ if composition_already_done is None: composition_already_done = set() if len(Graph.nodes) == 1: node = list(Graph.nodes)[0] return models_dico[node] node = _find_first_composition_node(Graph, composition_already_done) if node is not None: successors = list(Graph.successors(node)) assert len(successors) > 0 else: successors = [] if node is None or len(successors) == 0: ### ** It's means I'll return a GraphPipeline ** ### # 2 cases : # * nodes is None : meaning there is no composition node if len(successors) > 0: raise ValueError( "a composition node should have at most one successor '%s'" % str(node)) # assert len(successors) > 0 # it shouldn't append ... # 1) either it an original node => composition node => no successor isn't possible # 2) the node was already handled => should have been in the list edges = gh.edges_from_graph(Graph) if model_name_mapping is None: model_name_mapping = _create_name_mapping(list(Graph.nodes)) # each node in graph will be mapped to a name within the GraphPipeline models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes} edges = [ tuple((model_name_mapping[e] for e in edge)) for edge in edges ] return (SpecialModels.GraphPipeline, { "models": models, "edges": edges }) composition_already_done.add(node) # to prevent looping on the same node all_sub_branch_nodes = {} all_terminal_nodes = [] for successor in successors: sub_branch_nodes = list( gh.subbranch_search(starting_node=successor, Graph=Graph, visited={node})) all_sub_branch_nodes[successor] = sub_branch_nodes assert successor in sub_branch_nodes sub_Graph = Graph.subgraph(sub_branch_nodes) all_terminal_nodes += gh.get_terminal_nodes(sub_Graph) models_dico[successor] = _rec_convert_graph_to_code( sub_Graph, all_models_params=all_models_params, models_dico=models_dico, model_name_mapping=model_name_mapping, composition_already_done=composition_already_done, ) # Check all_s = [ frozenset(Graph.successors(t_node)) for t_node in all_terminal_nodes ] if len(set(all_s)) != 1: # By convention, if we look at the nodes AFTER the composition # (ie : the successors of the terminal nodes of the part of the graph that will be merged by the composition) # Those nodes should have the same list of successors. Those successors will be the successors of the merged node raise ValueError( "The successor at the end of the composition node %s are not always the same" % str(node)) if len(successors) == 1: # Only one sucessor of composition node models_dico[node] = (_klass_from_node(node), models_dico[successors[0]], all_models_params[node]) elif len(successors) > 1: models_dico[node] = ( _klass_from_node(node), [models_dico[successor] for successor in successors], all_models_params[node], ) else: raise NotImplementedError("can't go there") # Now I need to merge 'node' with all the sub-branches nodes_mapping = {} for successor, sub_branch_nodes in all_sub_branch_nodes.items(): for n in sub_branch_nodes: nodes_mapping[n] = node Gmerged = gh.merge_nodes(Graph, nodes_mapping=nodes_mapping) # All the node in successor will be 'fused' with 'node' ... # Recurse now, that the composition node is taken care of return _rec_convert_graph_to_code( Gmerged, all_models_params=all_models_params, models_dico=models_dico, model_name_mapping=model_name_mapping, composition_already_done=composition_already_done, )
def _approx_cross_validation_create_sub_graph_pipeline(self, data_dico, X): """ this sub-method create the new graph-pipeline that should be fully cross-validated, it also create the new data on which to cv Returns ------- new_graph_pipeline new_data """ ### Create a new GraphPipeline with only the remaning Nodes ### dones_nodes = set() for k, v in data_dico.items(): if v is not None: dones_nodes.add(k) newG = nx.DiGraph() new_models = {} new_datas = {} block_selector_nodes = set() for n1, n2 in self.complete_graph.edges: if n1 in dones_nodes and n2 in dones_nodes: pass elif n1 in dones_nodes and n2 not in dones_nodes: newG.add_edge("_data_%s" % n1, n2) new_models[n2] = self._models[n2] new_models["_data_%s" % n1] = BlockSelector("_data_%s" % n1) new_datas["_data_%s" % n1] = data_dico[n1] block_selector_nodes.add("_data_%s" % n1) # Add a BlockSelector elif n1 not in dones_nodes and n2 not in dones_nodes: newG.add_edge(n1, n2) new_models[n1] = self._models[n1] new_models[n2] = self._models[n2] else: raise ValueError("Should never go there") nodes = list(newG.nodes) # copy because I'll modify the graph for n in nodes: preds = list(newG.predecessors(n)) if len(preds) == 0 and n not in block_selector_nodes: newG.add_edge("_data_", n) new_models["_data_"] = BlockSelector("_data_") new_datas["_data_"] = X new_data_dtm = BlockManager(new_datas) new_graph_pipeline = GraphPipeline(models=new_models, edges=edges_from_graph(newG)) return new_graph_pipeline, new_data_dtm
def _rec_convert_graph_to_code_OLD2(Graph, all_models_params, models_dico, model_name_mapping=None): """ recursive function used to convert a Graph into a json code See convert_graph_to_code """ ### ** only one node in Graph : I'll return what was saved in models_dico ** ### if len(Graph.nodes) == 1: node = list(Graph.nodes)[0] return models_dico[node] node = _find_first_composition_node(Graph) if node is not None: predecessors = gh.get_all_predecessors(Graph, node) successors = gh.get_all_successors(Graph, node) if not gh.is_it_a_partition(list(Graph.nodes), [predecessors, [node], successors]): raise ValueError("Incorrect graph, wrong split around node %s" % str(node)) else: predecessors = [] successors = [] if node is None or len(successors) == 0: ### ** It's means I'll return a GraphPipeline ** ### edges = gh.edges_from_graph(Graph) if model_name_mapping is None: model_name_mapping = _create_name_mapping(list(Graph.nodes)) # each node in graph will be mapped to a name within the GraphPipeline models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes} edges = [ tuple((model_name_mapping[e] for e in edge)) for edge in edges ] return (SpecialModels.GraphPipeline, { "models": models, "edges": edges }) Graph_bellow = Graph.subgraph(successors) connected_Gbellow = gh.get_connected_graphs(Graph_bellow) if len(predecessors) == 0 and len(connected_Gbellow) > 1: return ( _klass_from_node(node), [ _rec_convert_graph_to_code_OLD2(Gb, all_models_params, models_dico, model_name_mapping) for Gb in connected_Gbellow ], all_models_params[node], ) elif len(predecessors) == 0 and len(connected_Gbellow) == 1: return ( _klass_from_node(node), _rec_convert_graph_to_code_OLD2(Graph_bellow, all_models_params, models_dico, model_name_mapping), all_models_params[node], ) else: G_bellow_and_node = Graph.subgraph([node] + successors) G_above = Graph.subgraph(predecessors + [node]) models_dico[node] = _rec_convert_graph_to_code_OLD2( G_bellow_and_node, all_models_params, models_dico, model_name_mapping) return _rec_convert_graph_to_code(G_above, all_models_params, models_dico, model_name_mapping)
def _rec_convert_graph_to_code_OLD(G, all_params): """ recursive function to convert a graph into a json representation """ if len(G.nodes) == 0: return {} ### 1) Find First composition node has_composition = False for node in gh.iter_graph(G): if StepCategories.is_composition_step(node[0]): has_composition = True break return_gpipe = not has_composition if has_composition: ### If there is a composition node, I need to split between what is above and what is bellow predecessors = gh.get_all_predecessors(G, node) successors = gh.get_all_successors(G, node) if not gh.is_it_a_partition(list(G.nodes), [predecessors, [node], successors]): raise ValueError("Incorrect graph, wrong split around node %s" % str(node)) if len(successors) == 0: # If nothing bellow, I'll be able to return something return_gpipe = True if return_gpipe: if len(G.nodes) > 1: ### I'll create a GraphPipeline object edges = gh.edges_from_graph(G) model_name_mapping = _create_name_mapping(list(G.nodes)) # each node in graph will be mapped to a name within the GraphPipeline models = {model_name_mapping[n]: all_params[n] for n in G.nodes} edges = [ tuple((model_name_mapping[e] for e in edge)) for edge in edges ] return (SpecialModels.GraphPipeline, { "models": models, "edges": edges }) else: ### Otherwise it is just the model_name with its parameters return node[1][1], all_params[list(G.nodes)[0]] G_above = G.subgraph(predecessors + [node]) G_bellow = G.subgraph(successors) connected_Gbellow = gh.get_connected_graphs(G_bellow) if len(connected_Gbellow) == 1: # what is bellow is a 'connected graph' : it means that the composition need should be applied to One model all_params[node] = _rec_convert_graph_to_code_OLD(G_bellow, all_params) else: # otherwise, the composition will be applied to a list of models all_params[node] = [ _rec_convert_graph_to_code_OLD(g, all_params) for g in connected_Gbellow ] return _rec_convert_graph_to_code_OLD(G_above, all_params)