예제 #1
0
def test_graph_from_edges_string():

    edges_strings = [
        "A -  B - C  ; D - C",
        "A - B ; B - C ; D - C",
        "A -> B -> C ; D -> C",
        "A-B;B-C;D-C",
        "A->B;B->C;D-C;",
    ]

    for edges_string in edges_strings:
        G = graph_from_edges_string(edges_string)
        assert set(G.nodes) == {"A", "B", "C", "D"}
        assert set(G.edges) == {("A", "B"), ("B", "C"), ("D", "C")}

        edges2 = edges_from_graph(G)
        G2 = graph_from_edges(*edges2)

        assert set(G2.nodes) == set(G.nodes)
        assert set(G2.edges) == set(G.edges)

    edges_strings = ["1 - 2 - 3 ; 4 - 3", "1 - 2 ; 2 - 3 ; 4 - 3"]

    for edges_string in edges_strings:
        G = graph_from_edges_string(edges_string)
        assert set(G.nodes) == {1, 2, 3, 4}
        assert set(G.edges) == {(1, 2), (2, 3), (4, 3)}

        edges2 = edges_from_graph(G)
        G2 = graph_from_edges(*edges2)

        assert set(G2.nodes) == set(G.nodes)
        assert set(G2.edges) == set(G.edges)
예제 #2
0
def test_edges_from_graph():
    G = nx.DiGraph()
    G.add_node("A")
    G.add_node("B")

    assert edges_from_graph(G) == [("A", ), ("B", )]

    G = nx.DiGraph()
    G.add_node("B")
    G.add_node("A")
    assert edges_from_graph(G) == [("A", ), ("B", )]

    G = graph_from_edges(("A", "B"), ("C", "D"))
    assert set(G.nodes) == {'A', 'B', 'C', 'D'}
    assert set(G.edges) == {('A', 'B'), ('C', 'D')}

    assert edges_from_graph(G) == [('A', 'B'), ('C', 'D')]
예제 #3
0
def test_edges_from_graph():
    G = nx.DiGraph()
    G.add_node("A")
    G.add_node("B")

    assert edges_from_graph(G) == [("A", ), ("B", )]

    G = nx.DiGraph()
    G.add_node("B")
    G.add_node("A")
    assert edges_from_graph(G) == [("A", ), ("B", )]

    G = graph_from_edges(("A", "B"), ("C", "D"))
    assert set(G.nodes) == {"A", "B", "C", "D"}
    assert set(G.edges) == {("A", "B"), ("C", "D")}

    assert edges_from_graph(G) == [("A", "B"), ("C", "D")]
예제 #4
0
def _rec_convert_graph_to_code(Graph,
                               all_models_params,
                               models_dico,
                               model_name_mapping=None,
                               composition_already_done=None):
    """ recursive function used to convert a Graph into a json code 
   
    See convert_graph_to_code
    """

    if composition_already_done is None:
        composition_already_done = set()

    if len(Graph.nodes) == 1:
        node = list(Graph.nodes)[0]
        return models_dico[node]

    node = _find_first_composition_node(Graph, composition_already_done)

    if node is not None:
        successors = list(Graph.successors(node))
        assert len(successors) > 0

    else:
        successors = []

    if node is None or len(successors) == 0:
        ### ** It's means I'll return a GraphPipeline ** ###
        # 2 cases :
        # * nodes is None  : meaning there is no composition node

        if len(successors) > 0:
            raise ValueError(
                "a composition node should have at most one successor '%s'" %
                str(node))

        # assert len(successors) > 0

        # it shouldn't append ...
        # 1) either it an original node => composition node => no successor isn't possible
        # 2) the node was already handled => should have been in the list

        edges = gh.edges_from_graph(Graph)

        if model_name_mapping is None:
            model_name_mapping = _create_name_mapping(list(Graph.nodes))
        # each node in graph will be mapped to a name within the GraphPipeline

        models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes}

        edges = [
            tuple((model_name_mapping[e] for e in edge)) for edge in edges
        ]

        return (SpecialModels.GraphPipeline, {
            "models": models,
            "edges": edges
        })

    composition_already_done.add(node)  # to prevent looping on the same node

    all_sub_branch_nodes = {}
    all_terminal_nodes = []
    for successor in successors:

        sub_branch_nodes = list(
            gh.subbranch_search(starting_node=successor,
                                Graph=Graph,
                                visited={node}))

        all_sub_branch_nodes[successor] = sub_branch_nodes

        assert successor in sub_branch_nodes

        sub_Graph = Graph.subgraph(sub_branch_nodes)

        all_terminal_nodes += gh.get_terminal_nodes(sub_Graph)

        models_dico[successor] = _rec_convert_graph_to_code(
            sub_Graph,
            all_models_params=all_models_params,
            models_dico=models_dico,
            model_name_mapping=model_name_mapping,
            composition_already_done=composition_already_done,
        )

    # Check
    all_s = [
        frozenset(Graph.successors(t_node)) for t_node in all_terminal_nodes
    ]
    if len(set(all_s)) != 1:
        # By convention, if we look at the nodes AFTER the composition
        # (ie : the successors of the terminal nodes of the part of the graph that will be merged by the composition)
        # Those nodes should have the same list of successors. Those successors will be the successors of the merged node
        raise ValueError(
            "The successor at the end of the composition node %s are not always the same"
            % str(node))

    if len(successors) == 1:

        # Only one sucessor of composition node

        models_dico[node] = (_klass_from_node(node),
                             models_dico[successors[0]],
                             all_models_params[node])

    elif len(successors) > 1:

        models_dico[node] = (
            _klass_from_node(node),
            [models_dico[successor] for successor in successors],
            all_models_params[node],
        )

    else:
        raise NotImplementedError("can't go there")

    # Now I need to merge 'node' with all the sub-branches
    nodes_mapping = {}
    for successor, sub_branch_nodes in all_sub_branch_nodes.items():
        for n in sub_branch_nodes:
            nodes_mapping[n] = node

    Gmerged = gh.merge_nodes(Graph, nodes_mapping=nodes_mapping)
    # All the node in successor will be 'fused' with 'node' ...
    # Recurse now, that the composition node is taken care of

    return _rec_convert_graph_to_code(
        Gmerged,
        all_models_params=all_models_params,
        models_dico=models_dico,
        model_name_mapping=model_name_mapping,
        composition_already_done=composition_already_done,
    )
예제 #5
0
    def _approx_cross_validation_create_sub_graph_pipeline(self, data_dico, X):
        """ this sub-method create the new graph-pipeline that should be fully cross-validated,
        it also create the new data on which to cv 
        
        Returns
        -------
        new_graph_pipeline
        
        new_data
        """
        ### Create a new GraphPipeline with only the remaning Nodes ###

        dones_nodes = set()
        for k, v in data_dico.items():
            if v is not None:
                dones_nodes.add(k)

        newG = nx.DiGraph()
        new_models = {}
        new_datas = {}
        block_selector_nodes = set()

        for n1, n2 in self.complete_graph.edges:

            if n1 in dones_nodes and n2 in dones_nodes:
                pass

            elif n1 in dones_nodes and n2 not in dones_nodes:

                newG.add_edge("_data_%s" % n1, n2)

                new_models[n2] = self._models[n2]
                new_models["_data_%s" % n1] = BlockSelector("_data_%s" % n1)

                new_datas["_data_%s" % n1] = data_dico[n1]

                block_selector_nodes.add("_data_%s" % n1)
                # Add a BlockSelector

            elif n1 not in dones_nodes and n2 not in dones_nodes:
                newG.add_edge(n1, n2)

                new_models[n1] = self._models[n1]
                new_models[n2] = self._models[n2]

            else:
                raise ValueError("Should never go there")

        nodes = list(newG.nodes)  # copy because I'll modify the graph
        for n in nodes:
            preds = list(newG.predecessors(n))
            if len(preds) == 0 and n not in block_selector_nodes:

                newG.add_edge("_data_", n)
                new_models["_data_"] = BlockSelector("_data_")

                new_datas["_data_"] = X

        new_data_dtm = BlockManager(new_datas)

        new_graph_pipeline = GraphPipeline(models=new_models,
                                           edges=edges_from_graph(newG))

        return new_graph_pipeline, new_data_dtm
예제 #6
0
def _rec_convert_graph_to_code_OLD2(Graph,
                                    all_models_params,
                                    models_dico,
                                    model_name_mapping=None):
    """ recursive function used to convert a Graph into a json code 
   
    See convert_graph_to_code
    """

    ### ** only one node in Graph : I'll return what was saved in models_dico ** ###
    if len(Graph.nodes) == 1:
        node = list(Graph.nodes)[0]
        return models_dico[node]

    node = _find_first_composition_node(Graph)

    if node is not None:
        predecessors = gh.get_all_predecessors(Graph, node)
        successors = gh.get_all_successors(Graph, node)

        if not gh.is_it_a_partition(list(Graph.nodes),
                                    [predecessors, [node], successors]):
            raise ValueError("Incorrect graph, wrong split around node %s" %
                             str(node))
    else:
        predecessors = []
        successors = []

    if node is None or len(successors) == 0:
        ### ** It's means I'll return a GraphPipeline ** ###
        edges = gh.edges_from_graph(Graph)

        if model_name_mapping is None:
            model_name_mapping = _create_name_mapping(list(Graph.nodes))
        # each node in graph will be mapped to a name within the GraphPipeline

        models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes}

        edges = [
            tuple((model_name_mapping[e] for e in edge)) for edge in edges
        ]

        return (SpecialModels.GraphPipeline, {
            "models": models,
            "edges": edges
        })

    Graph_bellow = Graph.subgraph(successors)

    connected_Gbellow = gh.get_connected_graphs(Graph_bellow)

    if len(predecessors) == 0 and len(connected_Gbellow) > 1:

        return (
            _klass_from_node(node),
            [
                _rec_convert_graph_to_code_OLD2(Gb, all_models_params,
                                                models_dico,
                                                model_name_mapping)
                for Gb in connected_Gbellow
            ],
            all_models_params[node],
        )

    elif len(predecessors) == 0 and len(connected_Gbellow) == 1:

        return (
            _klass_from_node(node),
            _rec_convert_graph_to_code_OLD2(Graph_bellow, all_models_params,
                                            models_dico, model_name_mapping),
            all_models_params[node],
        )

    else:

        G_bellow_and_node = Graph.subgraph([node] + successors)
        G_above = Graph.subgraph(predecessors + [node])

        models_dico[node] = _rec_convert_graph_to_code_OLD2(
            G_bellow_and_node, all_models_params, models_dico,
            model_name_mapping)

        return _rec_convert_graph_to_code(G_above, all_models_params,
                                          models_dico, model_name_mapping)
예제 #7
0
def _rec_convert_graph_to_code_OLD(G, all_params):
    """ recursive function to convert a graph into a json representation """
    if len(G.nodes) == 0:
        return {}

    ### 1) Find First composition node
    has_composition = False
    for node in gh.iter_graph(G):
        if StepCategories.is_composition_step(node[0]):
            has_composition = True
            break

    return_gpipe = not has_composition

    if has_composition:
        ### If there is a composition node, I need to split between what is above and what is bellow
        predecessors = gh.get_all_predecessors(G, node)
        successors = gh.get_all_successors(G, node)

        if not gh.is_it_a_partition(list(G.nodes),
                                    [predecessors, [node], successors]):
            raise ValueError("Incorrect graph, wrong split around node %s" %
                             str(node))

        if len(successors) == 0:
            # If nothing bellow, I'll be able to return something
            return_gpipe = True

    if return_gpipe:

        if len(G.nodes) > 1:
            ### I'll create a GraphPipeline object

            edges = gh.edges_from_graph(G)

            model_name_mapping = _create_name_mapping(list(G.nodes))
            # each node in graph will be mapped to a name within the GraphPipeline

            models = {model_name_mapping[n]: all_params[n] for n in G.nodes}

            edges = [
                tuple((model_name_mapping[e] for e in edge)) for edge in edges
            ]

            return (SpecialModels.GraphPipeline, {
                "models": models,
                "edges": edges
            })

        else:
            ### Otherwise it is just the model_name with its parameters
            return node[1][1], all_params[list(G.nodes)[0]]

    G_above = G.subgraph(predecessors + [node])
    G_bellow = G.subgraph(successors)

    connected_Gbellow = gh.get_connected_graphs(G_bellow)
    if len(connected_Gbellow) == 1:
        # what is bellow is a 'connected graph' : it means that the composition need should be applied to One model
        all_params[node] = _rec_convert_graph_to_code_OLD(G_bellow, all_params)

    else:
        # otherwise, the composition will be applied to a list of models
        all_params[node] = [
            _rec_convert_graph_to_code_OLD(g, all_params)
            for g in connected_Gbellow
        ]

    return _rec_convert_graph_to_code_OLD(G_above, all_params)