Beispiel #1
0
def assert_model_graph_structure(G):
    """ verification on the structure of the graph """

    # only one terminal node
    if len(gh.get_terminal_nodes(G)) != 1:
        raise ValueError("I should have only one terminal node")

    # connex graph
    if not gh.is_connected(G):
        raise ValueError("the graph should be connected")

    # no cycle
    if gh.has_cycle(G):
        raise ValueError("The graph shouldn't have any cycle")

    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            if len(list(G.successors(node))) == 0:
                raise ValueError("Composition node %s has no successor" % node)

    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            successors = gh.get_all_successors(G, node)
            predecessors = gh.get_all_predecessors(G, node)

            if not gh.is_it_a_partition(list(G.nodes),
                                        [successors, [node], predecessors]):
                raise ValueError("Incorrect split around composition node %s" %
                                 node)
Beispiel #2
0
def _find_first_composition_node(Graph):
    """ retrieve the 'first' composition node of a Graph,
    it no composition node, return None
    """
    for node in gh.iter_graph(Graph):
        if StepCategories.is_composition_step(node[0]):
            return node

    return None
Beispiel #3
0
def _find_first_composition_node(Graph, composition_already_done=None):
    """ retrieve the 'first' composition node of a Graph,
    it will ignore composition node already in 'composition_already_done'
    it no composition node, return None
    """
    if composition_already_done is None:
        composition_already_done = set()

    for node in gh.iter_graph(Graph):
        if StepCategories.is_composition_step(
                node[0]) and node not in composition_already_done:
            return node

    return None
Beispiel #4
0
def graphviz_modelgraph(G):
    """ create a graphviz Graph that can be plotted.
    
    Remark: graphviz can directly be displayed in an interactive environnement like IPython or Jupyter 
    """
    if graphviz is None:
        raise ValueError("You need to install graphviz")

    if isinstance(G, nx.DiGraph):
        G2 = graphviz.Digraph()
    else:
        G2 = graphviz.Graph()

    new_n = lambda x: (x[1][0], x[1][1])

    node_compo = []
    node_other = []
    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            node_compo.append(new_n(node))
        else:
            node_other.append(new_n(node))

    G2.attr("node", color="lightgreen")
    for node in node_compo:
        G2.node(str(node))

    G2.attr("node", color="lightblue")
    for node in node_other:
        G2.node(str(node))

    for e1, e2 in G.edges():
        if e1 in node_compo:
            G2.attr("edge", color="lightgreen", penwidth="1.0")
        else:
            G2.attr("edge", color="black", pendwidth="1.0")
        G2.edge(str(new_n(e1)), str(new_n(e2)))

    G2.node_attr.update(style="filled")

    return G2
Beispiel #5
0
def model_graph_plot(Graph, ax=None):
    """ plot a graphical representing a model """
    if plt is None:
        raise ValueError("Please install matplotlib")

    if ax is None:
        ax = plt.gca()

    ax.cla()
    pos = nx.spring_layout(Graph)
    n1 = []
    n2 = []
    for step_name, model_name in Graph.nodes:
        if StepCategories.is_composition_step(step_name):
            n1.append((step_name, model_name))
        else:
            n2.append((step_name, model_name))

    nx.draw(Graph, pos=pos, ax=ax, node_color="y", nodelist=n1)
    nx.draw(Graph, pos=pos, ax=ax, node_color="r", nodelist=n2)
    nx.draw_networkx_labels(Graph, pos=pos, ax=ax)

    return ax
Beispiel #6
0
def is_composition_model(name):
    """ is it a composition model """
    return StepCategories.is_composition_step(name[0])
Beispiel #7
0
def add_columns_selector(Graph, var_type_node_dico, var_type_columns_dico,
                         all_models_params):
    """ include columns selector where it is needed
    Either modify the graph by adding new edge with selector model
    Or modify all_models_params to include a 'columns_to_use' parameter
    
    Parameters
    ----------
    Graph : nx.DiGraph
        Graph representation the model
        
    var_type_node_dico : dict
        dictionnary indicating which node works on which type of columns.
        keys = node of Graph (ie : name of models)
        values = variable type
        
    var_type_columns_dico : dict
        dictionnary indicating which type of variable corresponds to which columns
        keys = variable type
        values = list of columns
        
    all_models_params : dict
        dictionnary indicating the parameter of each models, it will be modified to include 'columns_to_use'
        keys = node of Graph (ie: name of models)
        values = parameters of each models
        
        WILL BE MODIFIED in PLACE !
        
    Returns
    -------
    modified Graph
    modified all_params
    
    
    """

    nodes_no_composition = [
        n for n in Graph.nodes if not StepCategories.is_composition_step(n[0])
    ]
    sub_Graph = Graph.subgraph(nodes_no_composition)

    starting_nodes = gh.get_starting_nodes(sub_Graph)

    for node in starting_nodes:
        vtype = var_type_node_dico[node]
        if _must_include_selector(node[1]):

            if vtype is not None:

                name_of_cat = "Selector_%s" % unnest_tuple(vtype)
                new_node = (name_of_cat, (name_of_cat,
                                          SpecialModels.ColumnsSelector))
                if new_node in Graph.nodes:
                    raise ValueError(
                        "Please check, I have duplicate names : %s" %
                        str(new_node))

                Graph = gh.insert_node_above(Graph, node, new_node=new_node)

                all_models_params[new_node] = {
                    "columns_to_use": _get_columns(vtype,
                                                   var_type_columns_dico)
                }

            else:
                pass  # nothing to do : the transformer would need a selector BUT vtype is None which means I can apply to everything

        else:
            if vtype is not None:
                all_models_params[node]["columns_to_use"] = _get_columns(
                    vtype, var_type_columns_dico)

    return Graph, all_models_params
Beispiel #8
0
def create_graphical_representation(steps):
    """ from a an OrderedDict of steps create a Graphical reprensetation of the model we'll use """

    # Rmk : il faut a priori, mettre les numero de l'etape dans le graph
    # + mettre les labels correct
    # comme ça on pourra avoir plusieurs noeud avec le meme nom (Ex : Scaler...)

    ### 1) Split Composion Steps vs Rest
    all_composition_steps = []
    all_others = []
    for (step_name, model_name), var_type in steps.items():
        if StepCategories.is_composition_step(step_name):
            all_composition_steps.append((step_name, model_name, var_type))
        else:
            all_others.append((step_name, model_name, var_type))

    ### 2) Create Graph for non-composition step
    new_steps = OrderedDict()

    G = nx.DiGraph()
    for step_name, model_name, var_type in all_others:
        # for name,var_type in steps.items():

        unested_var_type = unnest_tuple(var_type)

        terminal_nodes = gh.get_terminal_nodes(
            G
        )  # Terminal links : I'll add the new step on one (or more) of those

        ending_node_type = {
            unnest_tuple(steps[node]): node
            for node in terminal_nodes
        }

        node_name = (step_name, model_name)  # 2-uple
        if node_name in G.nodes:
            raise ValueError("This node already exists '(%s,%s)'" % node_name)

        # 1) Soit je rattache le nouveau a UN noeud terminal
        # 2) Soit je cree une nouvelle branche (nouveau noeud ratacher a rien)
        # 3) Soit je rattache a PLUSIEURS noeud terminaux

        elif unested_var_type in ending_node_type:
            ### 1) I already have a branch of this type
            last_node = ending_node_type[unested_var_type]
            G = gh.add_node_after(G, node_name, last_node)

        ### I don't have a branch ###
        else:
            all_candidates = [(t, n) for t, n in ending_node_type.items()
                              if tuple_include(t, unested_var_type)]
            # I need to look where I want to plug it #
            if len(all_candidates) == 0:
                ### 2) Je dois creer une nouvelle branche : aucun noeud ###
                G = gh.add_node_after(G, node_name)
            else:
                ### 3) Je rattache a plusieurs noeuds

                ### Ici : il faut parfois rajouter un noeud en AMONT, si on a des types qui n'ont pas ete rajouter
                types_added = unnest_tuple([t for t, n in all_candidates])
                types_not_added = diff(unested_var_type, types_added)
                if len(types_not_added) > 0:

                    name_of_cat = "Selector_%s" % unnest_tuple(types_not_added)
                    new_node = (name_of_cat, (name_of_cat,
                                              SpecialModels.ColumnsSelector))

                    G = gh.add_node_after(G, new_node)

                    new_steps[
                        new_node] = types_not_added  # I also must dynamically add the node to the list of steps

                    all_candidates = all_candidates + [
                        (types_not_added, new_node)
                    ]

                G = gh.add_node_after(G, node_name,
                                      *[n for t, n in all_candidates])

    ### 3) Include composition node on top
    for step_name, model_name, _ in reversed(all_composition_steps):
        starting_nodes = gh.get_starting_nodes(G)
        for n in starting_nodes:
            G.add_edge((step_name, model_name), n)

    ### 4) Verify the Graph structure

    for (step_name, model_name), _ in steps.items():
        if (step_name, model_name) not in G:
            raise ValueError("'(%s , %s)' should be in graph" %
                             (step_name, model_name))
    # all nodes were in the steps
    for node in G.nodes():
        if node not in steps and node not in new_steps:
            raise ValueError("'(%s,%s)' shouldn't be in graph" % node)

    assert_model_graph_structure(G)

    return G, new_steps
Beispiel #9
0
def _rec_convert_graph_to_code_OLD(G, all_params):
    """ recursive function to convert a graph into a json representation """
    if len(G.nodes) == 0:
        return {}

    ### 1) Find First composition node
    has_composition = False
    for node in gh.iter_graph(G):
        if StepCategories.is_composition_step(node[0]):
            has_composition = True
            break

    return_gpipe = not has_composition

    if has_composition:
        ### If there is a composition node, I need to split between what is above and what is bellow
        predecessors = gh.get_all_predecessors(G, node)
        successors = gh.get_all_successors(G, node)

        if not gh.is_it_a_partition(list(G.nodes),
                                    [predecessors, [node], successors]):
            raise ValueError("Incorrect graph, wrong split around node %s" %
                             str(node))

        if len(successors) == 0:
            # If nothing bellow, I'll be able to return something
            return_gpipe = True

    if return_gpipe:

        if len(G.nodes) > 1:
            ### I'll create a GraphPipeline object

            edges = gh.edges_from_graph(G)

            model_name_mapping = _create_name_mapping(list(G.nodes))
            # each node in graph will be mapped to a name within the GraphPipeline

            models = {model_name_mapping[n]: all_params[n] for n in G.nodes}

            edges = [
                tuple((model_name_mapping[e] for e in edge)) for edge in edges
            ]

            return (SpecialModels.GraphPipeline, {
                "models": models,
                "edges": edges
            })

        else:
            ### Otherwise it is just the model_name with its parameters
            return node[1][1], all_params[list(G.nodes)[0]]

    G_above = G.subgraph(predecessors + [node])
    G_bellow = G.subgraph(successors)

    connected_Gbellow = gh.get_connected_graphs(G_bellow)
    if len(connected_Gbellow) == 1:
        # what is bellow is a 'connected graph' : it means that the composition need should be applied to One model
        all_params[node] = _rec_convert_graph_to_code_OLD(G_bellow, all_params)

    else:
        # otherwise, the composition will be applied to a list of models
        all_params[node] = [
            _rec_convert_graph_to_code_OLD(g, all_params)
            for g in connected_Gbellow
        ]

    return _rec_convert_graph_to_code_OLD(G_above, all_params)