Beispiel #1
0
def assert_model_graph_structure(G):
    """ verification on the structure of the graph """

    # only one terminal node
    if len(gh.get_terminal_nodes(G)) != 1:
        raise ValueError("I should have only one terminal node")

    # connex graph
    if not gh.is_connected(G):
        raise ValueError("the graph should be connected")

    # no cycle
    if gh.has_cycle(G):
        raise ValueError("The graph shouldn't have any cycle")

    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            if len(list(G.successors(node))) == 0:
                raise ValueError("Composition node %s has no successor" % node)

    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            successors = gh.get_all_successors(G, node)
            predecessors = gh.get_all_predecessors(G, node)

            if not gh.is_it_a_partition(list(G.nodes),
                                        [successors, [node], predecessors]):
                raise ValueError("Incorrect split around composition node %s" %
                                 node)
Beispiel #2
0
def test_all_graphs_functions():
    G = nx.DiGraph()
    G = add_node_after(G, 1)
    G = add_node_after(G, 2, 1)
    G = add_node_after(G, 3, 2)
    G = add_node_after(G, 4)
    G = add_node_after(G, 5, 4)
    G = add_node_after(G, 6, 5, 3)

    assert set(get_terminal_nodes(G)) == {6}
    assert set(get_starting_nodes(G)) == {1, 4}

    assert set(get_all_successors(G, 1)) == {2, 3, 6}
    assert set(get_all_successors(G, 2)) == {3, 6}
    assert set(get_all_successors(G, 3)) == {6}
    assert set(get_all_successors(G, 4)) == {5, 6}
    assert set(get_all_successors(G, 5)) == {6}
    assert set(get_all_successors(G, 6)) == set()

    assert set(get_all_predecessors(G, 1)) == set()
    assert set(get_all_predecessors(G, 2)) == {1}
    assert set(get_all_predecessors(G, 3)) == {1, 2}
    assert set(get_all_predecessors(G, 4)) == set()
    assert set(get_all_predecessors(G, 5)) == {4}
    assert set(get_all_predecessors(G, 6)) == {1, 2, 3, 4, 5}
Beispiel #3
0
    def get_subpipeline(self, end_node, deepcopy_models=False):
        """ create a New model that corresponds to the original GraphPipeline but with a new ending node
        If the original GraphPipeline was fitted, the new model will also be fitted
        
        Parameters
        ----------
        
        end_node : str
            the name of the node at which the new pipeline will stop. Must be in the Graph
            
        deepcopy_models : boolean, default=False
            if True will make a deepcopy of the models.
        
        Returns
        -------
        new GraphPipeline instance

        """
        self._complete_init()

        if end_node not in self.complete_graph:
            raise ValueError("the node '%s' isn't in the original graph" %
                             end_node)

        # get all predecessors of nodes => to include in the graph
        predecessors = get_all_predecessors(self.complete_graph, end_node)
        nodes_to_keep = list(predecessors) + [end_node]

        # Remark : we could separated into a  submethod to create a subpipeline from a list of nodes

        if len(nodes_to_keep) == 1:
            assert end_node == nodes_to_keep[0]
            return self._models[end_node]

        # filter edges
        edges_to_keep = []
        for e1, e2 in get_two_by_two_edges(*self._edges):
            if e1 in nodes_to_keep and e2 in nodes_to_keep:
                edges_to_keep.append((e1, e2))

        # I do that insteaf of :
        # complete_graph_sub.subgraph(nodes_to_keep).edges
        # beacause that way I preseve the order of the edges, which handle the concatenation order

        # Retrieve sklearn model
        if deepcopy_models:
            models = {
                node: deepcopy(self._models[node])
                for node in nodes_to_keep
            }
        else:
            models = {node: self._models[node] for node in nodes_to_keep}

        # Change 'no_concat_nodes'
        if self.no_concat_nodes is None:
            no_concat_nodes = None
        else:
            no_concat_nodes = [
                n for n in self.no_concat_nodes if n in nodes_to_keep
            ]
            no_concat_nodes = type(self.no_concat_nodes)(no_concat_nodes)
            if len(no_concat_nodes) == 0:
                no_concat_nodes = None

        ###############################
        ###   Create new pipeline   ###
        ###############################
        sub_pipeline = GraphPipeline(models=models,
                                     edges=edges_to_keep,
                                     verbose=self.verbose,
                                     no_concat_nodes=no_concat_nodes)

        # Internal modification to change the state
        if self._preparation_done:
            sub_pipeline._complete_init()

        if not self._already_fitted:
            return sub_pipeline

        # here the pipeline was fitted
        sub_pipeline._already_fitted = True
        sub_pipeline._Xinput_features = deepcopy(
            self._Xinput_features)  #copy just to be safe
        sub_pipeline._all_concat_order = dico_key_filter(
            self._all_concat_order, lambda n: n in nodes_to_keep)
        sub_pipeline._all_concat_type = dico_key_filter(
            self._all_concat_type, lambda n: n in nodes_to_keep)

        return sub_pipeline
Beispiel #4
0
def _rec_convert_graph_to_code_OLD2(Graph,
                                    all_models_params,
                                    models_dico,
                                    model_name_mapping=None):
    """ recursive function used to convert a Graph into a json code 
   
    See convert_graph_to_code
    """

    ### ** only one node in Graph : I'll return what was saved in models_dico ** ###
    if len(Graph.nodes) == 1:
        node = list(Graph.nodes)[0]
        return models_dico[node]

    node = _find_first_composition_node(Graph)

    if node is not None:
        predecessors = gh.get_all_predecessors(Graph, node)
        successors = gh.get_all_successors(Graph, node)

        if not gh.is_it_a_partition(list(Graph.nodes),
                                    [predecessors, [node], successors]):
            raise ValueError("Incorrect graph, wrong split around node %s" %
                             str(node))
    else:
        predecessors = []
        successors = []

    if node is None or len(successors) == 0:
        ### ** It's means I'll return a GraphPipeline ** ###
        edges = gh.edges_from_graph(Graph)

        if model_name_mapping is None:
            model_name_mapping = _create_name_mapping(list(Graph.nodes))
        # each node in graph will be mapped to a name within the GraphPipeline

        models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes}

        edges = [
            tuple((model_name_mapping[e] for e in edge)) for edge in edges
        ]

        return (SpecialModels.GraphPipeline, {
            "models": models,
            "edges": edges
        })

    Graph_bellow = Graph.subgraph(successors)

    connected_Gbellow = gh.get_connected_graphs(Graph_bellow)

    if len(predecessors) == 0 and len(connected_Gbellow) > 1:

        return (
            _klass_from_node(node),
            [
                _rec_convert_graph_to_code_OLD2(Gb, all_models_params,
                                                models_dico,
                                                model_name_mapping)
                for Gb in connected_Gbellow
            ],
            all_models_params[node],
        )

    elif len(predecessors) == 0 and len(connected_Gbellow) == 1:

        return (
            _klass_from_node(node),
            _rec_convert_graph_to_code_OLD2(Graph_bellow, all_models_params,
                                            models_dico, model_name_mapping),
            all_models_params[node],
        )

    else:

        G_bellow_and_node = Graph.subgraph([node] + successors)
        G_above = Graph.subgraph(predecessors + [node])

        models_dico[node] = _rec_convert_graph_to_code_OLD2(
            G_bellow_and_node, all_models_params, models_dico,
            model_name_mapping)

        return _rec_convert_graph_to_code(G_above, all_models_params,
                                          models_dico, model_name_mapping)
Beispiel #5
0
def _rec_convert_graph_to_code_OLD(G, all_params):
    """ recursive function to convert a graph into a json representation """
    if len(G.nodes) == 0:
        return {}

    ### 1) Find First composition node
    has_composition = False
    for node in gh.iter_graph(G):
        if StepCategories.is_composition_step(node[0]):
            has_composition = True
            break

    return_gpipe = not has_composition

    if has_composition:
        ### If there is a composition node, I need to split between what is above and what is bellow
        predecessors = gh.get_all_predecessors(G, node)
        successors = gh.get_all_successors(G, node)

        if not gh.is_it_a_partition(list(G.nodes),
                                    [predecessors, [node], successors]):
            raise ValueError("Incorrect graph, wrong split around node %s" %
                             str(node))

        if len(successors) == 0:
            # If nothing bellow, I'll be able to return something
            return_gpipe = True

    if return_gpipe:

        if len(G.nodes) > 1:
            ### I'll create a GraphPipeline object

            edges = gh.edges_from_graph(G)

            model_name_mapping = _create_name_mapping(list(G.nodes))
            # each node in graph will be mapped to a name within the GraphPipeline

            models = {model_name_mapping[n]: all_params[n] for n in G.nodes}

            edges = [
                tuple((model_name_mapping[e] for e in edge)) for edge in edges
            ]

            return (SpecialModels.GraphPipeline, {
                "models": models,
                "edges": edges
            })

        else:
            ### Otherwise it is just the model_name with its parameters
            return node[1][1], all_params[list(G.nodes)[0]]

    G_above = G.subgraph(predecessors + [node])
    G_bellow = G.subgraph(successors)

    connected_Gbellow = gh.get_connected_graphs(G_bellow)
    if len(connected_Gbellow) == 1:
        # what is bellow is a 'connected graph' : it means that the composition need should be applied to One model
        all_params[node] = _rec_convert_graph_to_code_OLD(G_bellow, all_params)

    else:
        # otherwise, the composition will be applied to a list of models
        all_params[node] = [
            _rec_convert_graph_to_code_OLD(g, all_params)
            for g in connected_Gbellow
        ]

    return _rec_convert_graph_to_code_OLD(G_above, all_params)