예제 #1
0
def test_all_graphs_functions():
    G = nx.DiGraph()
    G = add_node_after(G, 1)
    G = add_node_after(G, 2, 1)
    G = add_node_after(G, 3, 2)
    G = add_node_after(G, 4)
    G = add_node_after(G, 5, 4)
    G = add_node_after(G, 6, 5, 3)

    assert set(get_terminal_nodes(G)) == {6}
    assert set(get_starting_nodes(G)) == {1, 4}

    assert set(get_all_successors(G, 1)) == {2, 3, 6}
    assert set(get_all_successors(G, 2)) == {3, 6}
    assert set(get_all_successors(G, 3)) == {6}
    assert set(get_all_successors(G, 4)) == {5, 6}
    assert set(get_all_successors(G, 5)) == {6}
    assert set(get_all_successors(G, 6)) == set()

    assert set(get_all_predecessors(G, 1)) == set()
    assert set(get_all_predecessors(G, 2)) == {1}
    assert set(get_all_predecessors(G, 3)) == {1, 2}
    assert set(get_all_predecessors(G, 4)) == set()
    assert set(get_all_predecessors(G, 5)) == {4}
    assert set(get_all_predecessors(G, 6)) == {1, 2, 3, 4, 5}
예제 #2
0
def assert_model_graph_structure(G):
    """ verification on the structure of the graph """

    # only one terminal node
    if len(gh.get_terminal_nodes(G)) != 1:
        raise ValueError("I should have only one terminal node")

    # connex graph
    if not gh.is_connected(G):
        raise ValueError("the graph should be connected")

    # no cycle
    if gh.has_cycle(G):
        raise ValueError("The graph shouldn't have any cycle")

    for node in G.nodes:
        if is_composition_model(node):
            successors = list(G.successors(node))

            if len(successors) == 0:
                raise ValueError("Composition node %s has no successor" %
                                 str(node))

            for successor in successors:
                predecessors = list(G.predecessors(successor))
                if predecessors != [node]:
                    raise ValueError(
                        "The node %s has more than one parent, which is impossible for a child of a composition node (%s)"
                        % (str(successor), str(node)))
예제 #3
0
def assert_model_graph_structure(G):
    """ verification on the structure of the graph """

    # only one terminal node
    if len(gh.get_terminal_nodes(G)) != 1:
        raise ValueError("I should have only one terminal node")

    # connex graph
    if not gh.is_connected(G):
        raise ValueError("the graph should be connected")

    # no cycle
    if gh.has_cycle(G):
        raise ValueError("The graph shouldn't have any cycle")

    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            if len(list(G.successors(node))) == 0:
                raise ValueError("Composition node %s has no successor" % node)

    for node in G.nodes:
        if StepCategories.is_composition_step(node[0]):
            successors = gh.get_all_successors(G, node)
            predecessors = gh.get_all_predecessors(G, node)

            if not gh.is_it_a_partition(list(G.nodes),
                                        [successors, [node], predecessors]):
                raise ValueError("Incorrect split around composition node %s" %
                                 node)
예제 #4
0
    def _verif_graph_structure(self):
        """ verification on the structure of the graph """
        # Only one terminal node
        terminal_nodes = get_terminal_nodes(self.complete_graph)
        if len(terminal_nodes) != 1:
            raise ValueError("the graph should have only one terminal node, instead i got %d" % len(terminal_nodes))

        # Connexe
        if not nx.is_connected(self.complete_graph.to_undirected()):
            raise ValueError("the graph should be connected")

        # No Cycle
        has_error = False
        try:
            nx.find_cycle(self.complete_graph)
        except nx.NetworkXNoCycle:
            has_error = True

        if not has_error:
            raise ValueError("The graph shouldn't have any cycle")

        # Verif that I have model everywhere
        for name, model in self._models.items():

            # Terminal state
            if name in terminal_nodes:
                if not hasattr(model, "fit"):
                    raise TypeError("The terminal step (%s) should have a fit method" % name)
            else:
                if not hasattr(model, "fit") or not hasattr(model, "fit_transform") or not hasattr(model, "transform"):
                    raise TypeError(
                        "Intermediary step (%s) should have a 'fit','fit_transform' and 'transform' method" % name
                    )
예제 #5
0
def _rec_convert_graph_to_code(Graph,
                               all_models_params,
                               models_dico,
                               model_name_mapping=None,
                               composition_already_done=None):
    """ recursive function used to convert a Graph into a json code 
   
    See convert_graph_to_code
    """

    if composition_already_done is None:
        composition_already_done = set()

    if len(Graph.nodes) == 1:
        node = list(Graph.nodes)[0]
        return models_dico[node]

    node = _find_first_composition_node(Graph, composition_already_done)

    if node is not None:
        successors = list(Graph.successors(node))
        assert len(successors) > 0

    else:
        successors = []

    if node is None or len(successors) == 0:
        ### ** It's means I'll return a GraphPipeline ** ###
        # 2 cases :
        # * nodes is None  : meaning there is no composition node

        if len(successors) > 0:
            raise ValueError(
                "a composition node should have at most one successor '%s'" %
                str(node))

        # assert len(successors) > 0

        # it shouldn't append ...
        # 1) either it an original node => composition node => no successor isn't possible
        # 2) the node was already handled => should have been in the list

        edges = gh.edges_from_graph(Graph)

        if model_name_mapping is None:
            model_name_mapping = _create_name_mapping(list(Graph.nodes))
        # each node in graph will be mapped to a name within the GraphPipeline

        models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes}

        edges = [
            tuple((model_name_mapping[e] for e in edge)) for edge in edges
        ]

        return (SpecialModels.GraphPipeline, {
            "models": models,
            "edges": edges
        })

    composition_already_done.add(node)  # to prevent looping on the same node

    all_sub_branch_nodes = {}
    all_terminal_nodes = []
    for successor in successors:

        sub_branch_nodes = list(
            gh.subbranch_search(starting_node=successor,
                                Graph=Graph,
                                visited={node}))

        all_sub_branch_nodes[successor] = sub_branch_nodes

        assert successor in sub_branch_nodes

        sub_Graph = Graph.subgraph(sub_branch_nodes)

        all_terminal_nodes += gh.get_terminal_nodes(sub_Graph)

        models_dico[successor] = _rec_convert_graph_to_code(
            sub_Graph,
            all_models_params=all_models_params,
            models_dico=models_dico,
            model_name_mapping=model_name_mapping,
            composition_already_done=composition_already_done,
        )

    # Check
    all_s = [
        frozenset(Graph.successors(t_node)) for t_node in all_terminal_nodes
    ]
    if len(set(all_s)) != 1:
        # By convention, if we look at the nodes AFTER the composition
        # (ie : the successors of the terminal nodes of the part of the graph that will be merged by the composition)
        # Those nodes should have the same list of successors. Those successors will be the successors of the merged node
        raise ValueError(
            "The successor at the end of the composition node %s are not always the same"
            % str(node))

    if len(successors) == 1:

        # Only one sucessor of composition node

        models_dico[node] = (_klass_from_node(node),
                             models_dico[successors[0]],
                             all_models_params[node])

    elif len(successors) > 1:

        models_dico[node] = (
            _klass_from_node(node),
            [models_dico[successor] for successor in successors],
            all_models_params[node],
        )

    else:
        raise NotImplementedError("can't go there")

    # Now I need to merge 'node' with all the sub-branches
    nodes_mapping = {}
    for successor, sub_branch_nodes in all_sub_branch_nodes.items():
        for n in sub_branch_nodes:
            nodes_mapping[n] = node

    Gmerged = gh.merge_nodes(Graph, nodes_mapping=nodes_mapping)
    # All the node in successor will be 'fused' with 'node' ...
    # Recurse now, that the composition node is taken care of

    return _rec_convert_graph_to_code(
        Gmerged,
        all_models_params=all_models_params,
        models_dico=models_dico,
        model_name_mapping=model_name_mapping,
        composition_already_done=composition_already_done,
    )
예제 #6
0
def create_graphical_representation(steps):
    """ from a an OrderedDict of steps create a Graphical reprensetation of the model we'll use """

    # Rmk : il faut a priori, mettre les numero de l'etape dans le graph
    # + mettre les labels correct
    # comme ça on pourra avoir plusieurs noeud avec le meme nom (Ex : Scaler...)

    ### 1) Split Composion Steps vs Rest
    all_composition_steps = []
    all_others = []
    for (step_name, model_name), var_type in steps.items():
        if StepCategories.is_composition_step(step_name):
            all_composition_steps.append((step_name, model_name, var_type))
        else:
            all_others.append((step_name, model_name, var_type))

    ### 2) Create Graph for non-composition step
    new_steps = OrderedDict()

    G = nx.DiGraph()
    for step_name, model_name, var_type in all_others:
        # for name,var_type in steps.items():

        unested_var_type = unnest_tuple(var_type)

        terminal_nodes = gh.get_terminal_nodes(
            G
        )  # Terminal links : I'll add the new step on one (or more) of those

        ending_node_type = {
            unnest_tuple(steps[node]): node
            for node in terminal_nodes
        }

        node_name = (step_name, model_name)  # 2-uple
        if node_name in G.nodes:
            raise ValueError("This node already exists '(%s,%s)'" % node_name)

        # 1) Soit je rattache le nouveau a UN noeud terminal
        # 2) Soit je cree une nouvelle branche (nouveau noeud ratacher a rien)
        # 3) Soit je rattache a PLUSIEURS noeud terminaux

        elif unested_var_type in ending_node_type:
            ### 1) I already have a branch of this type
            last_node = ending_node_type[unested_var_type]
            G = gh.add_node_after(G, node_name, last_node)

        ### I don't have a branch ###
        else:
            all_candidates = [(t, n) for t, n in ending_node_type.items()
                              if tuple_include(t, unested_var_type)]
            # I need to look where I want to plug it #
            if len(all_candidates) == 0:
                ### 2) Je dois creer une nouvelle branche : aucun noeud ###
                G = gh.add_node_after(G, node_name)
            else:
                ### 3) Je rattache a plusieurs noeuds

                ### Ici : il faut parfois rajouter un noeud en AMONT, si on a des types qui n'ont pas ete rajouter
                types_added = unnest_tuple([t for t, n in all_candidates])
                types_not_added = diff(unested_var_type, types_added)
                if len(types_not_added) > 0:

                    name_of_cat = "Selector_%s" % unnest_tuple(types_not_added)
                    new_node = (name_of_cat, (name_of_cat,
                                              SpecialModels.ColumnsSelector))

                    G = gh.add_node_after(G, new_node)

                    new_steps[
                        new_node] = types_not_added  # I also must dynamically add the node to the list of steps

                    all_candidates = all_candidates + [
                        (types_not_added, new_node)
                    ]

                G = gh.add_node_after(G, node_name,
                                      *[n for t, n in all_candidates])

    ### 3) Include composition node on top
    for step_name, model_name, _ in reversed(all_composition_steps):
        starting_nodes = gh.get_starting_nodes(G)
        for n in starting_nodes:
            G.add_edge((step_name, model_name), n)

    ### 4) Verify the Graph structure

    for (step_name, model_name), _ in steps.items():
        if (step_name, model_name) not in G:
            raise ValueError("'(%s , %s)' should be in graph" %
                             (step_name, model_name))
    # all nodes were in the steps
    for node in G.nodes():
        if node not in steps and node not in new_steps:
            raise ValueError("'(%s,%s)' shouldn't be in graph" % node)

    assert_model_graph_structure(G)

    return G, new_steps
예제 #7
0
 def create_graph(self):
     """ create the graphical structure """
     self.complete_graph = graph_from_edges(*self._edges)
     self._verif_graph_structure()
     self._terminal_node = get_terminal_nodes(self.complete_graph)[0]
     self._nodes_order = list(iter_graph(self.complete_graph))
예제 #8
0
def test_RandomModelGenerator_iterator(type_of_iterator, num_only):

    dfX, y, auto_ml_config = get_automl_config(num_only)

    random_model_generator = RandomModelGenerator(
        auto_ml_config=auto_ml_config, random_state=123)

    if type_of_iterator == "default":
        iterator = random_model_generator.iterator_default_models()

    elif type_of_iterator == "block_search":
        iterator = random_model_generator.iterate_block_search(
            random_order=False)

    elif type_of_iterator == "block_search_random":
        iterator = random_model_generator.iterate_block_search(
            random_order=True)

    assert hasattr(iterator, "__iter__")

    # verif iterator
    for model in iterator:

        assert isinstance(model, tuple)
        assert len(model) == 3
        Graph, all_models_params, block_to_use = model

        terminal_nodes = get_terminal_nodes(Graph)
        assert len(terminal_nodes) == 1
        assert terminal_nodes[0][0] == StepCategories.Model

        #graphviz_graph(Graph)

        assert hasattr(Graph, "edges")
        assert hasattr(Graph, "nodes")

        assert isinstance(all_models_params, dict)
        for node in Graph.nodes:
            assert node in all_models_params

        assert isinstance(block_to_use, (tuple, list))
        for b in block_to_use:
            assert b in TypeOfVariables.alls

        result = convert_graph_to_code(Graph,
                                       all_models_params,
                                       also_returns_mapping=True)
        assert isinstance(result, dict)
        assert "name_mapping" in result
        assert "json_code" in result

        sk_model = sklearn_model_from_param(result["json_code"])
        assert hasattr(sk_model, "fit")

        if type_of_iterator == "default" and ('Model', (
                'Model', 'RandomForestClassifier')) in Graph.nodes:
            # in that case I'll actually do the fitting here
            # I'll simplify the model to have 2 estimators (faster)

            all_models_params[('Model',
                               ('Model',
                                'RandomForestClassifier'))]["n_estimators"] = 2
            result = convert_graph_to_code(Graph,
                                           all_models_params,
                                           also_returns_mapping=True)
            sk_model = sklearn_model_from_param(result["json_code"])

            sub_index = np.concatenate(
                (np.where(y == 0)[0][0:10], np.where(y == 1)[0][0:10]), axis=0)
            # Needs at least 20 observations to make sure all transformers works
            sk_model.fit(dfX.iloc[sub_index, :], y[sub_index])

            yhat = sk_model.predict(dfX.head(2))
            assert yhat.shape == (2, )