def test_unnest_tuple(): examples = [ ((1, 2, 3), (1, 2, 3)), ((1, (2, 3)), (1, 2, 3)), ((1, (2, (3))), (1, 2, 3)), (((1, ), (2, ), (3, 4)), (1, 2, 3, 4)), ] for nested_tuple, unnested_tuple in examples: assert unnest_tuple(nested_tuple) == unnested_tuple
def add_columns_selector(Graph, var_type_node_dico, var_type_columns_dico, all_models_params): """ include columns selector where it is needed Either modify the graph by adding new edge with selector model Or modify all_models_params to include a 'columns_to_use' parameter Parameters ---------- Graph : nx.DiGraph Graph representation the model var_type_node_dico : dict dictionnary indicating which node works on which type of columns. keys = node of Graph (ie : name of models) values = variable type var_type_columns_dico : dict dictionnary indicating which type of variable corresponds to which columns keys = variable type values = list of columns all_models_params : dict dictionnary indicating the parameter of each models, it will be modified to include 'columns_to_use' keys = node of Graph (ie: name of models) values = parameters of each models WILL BE MODIFIED in PLACE ! Returns ------- modified Graph modified all_params """ nodes_no_composition = [ n for n in Graph.nodes if not StepCategories.is_composition_step(n[0]) ] sub_Graph = Graph.subgraph(nodes_no_composition) starting_nodes = gh.get_starting_nodes(sub_Graph) for node in starting_nodes: vtype = var_type_node_dico[node] if _must_include_selector(node[1]): if vtype is not None: name_of_cat = "Selector_%s" % unnest_tuple(vtype) new_node = (name_of_cat, (name_of_cat, SpecialModels.ColumnsSelector)) if new_node in Graph.nodes: raise ValueError( "Please check, I have duplicate names : %s" % str(new_node)) Graph = gh.insert_node_above(Graph, node, new_node=new_node) all_models_params[new_node] = { "columns_to_use": _get_columns(vtype, var_type_columns_dico) } else: pass # nothing to do : the transformer would need a selector BUT vtype is None which means I can apply to everything else: if vtype is not None: all_models_params[node]["columns_to_use"] = _get_columns( vtype, var_type_columns_dico) return Graph, all_models_params
def create_graphical_representation(steps): """ from a an OrderedDict of steps create a Graphical reprensetation of the model we'll use """ # Rmk : il faut a priori, mettre les numero de l'etape dans le graph # + mettre les labels correct # comme ça on pourra avoir plusieurs noeud avec le meme nom (Ex : Scaler...) ### 1) Split Composion Steps vs Rest all_composition_steps = [] all_others = [] for (step_name, model_name), var_type in steps.items(): if StepCategories.is_composition_step(step_name): all_composition_steps.append((step_name, model_name, var_type)) else: all_others.append((step_name, model_name, var_type)) ### 2) Create Graph for non-composition step new_steps = OrderedDict() G = nx.DiGraph() for step_name, model_name, var_type in all_others: # for name,var_type in steps.items(): unested_var_type = unnest_tuple(var_type) terminal_nodes = gh.get_terminal_nodes( G ) # Terminal links : I'll add the new step on one (or more) of those ending_node_type = { unnest_tuple(steps[node]): node for node in terminal_nodes } node_name = (step_name, model_name) # 2-uple if node_name in G.nodes: raise ValueError("This node already exists '(%s,%s)'" % node_name) # 1) Soit je rattache le nouveau a UN noeud terminal # 2) Soit je cree une nouvelle branche (nouveau noeud ratacher a rien) # 3) Soit je rattache a PLUSIEURS noeud terminaux elif unested_var_type in ending_node_type: ### 1) I already have a branch of this type last_node = ending_node_type[unested_var_type] G = gh.add_node_after(G, node_name, last_node) ### I don't have a branch ### else: all_candidates = [(t, n) for t, n in ending_node_type.items() if tuple_include(t, unested_var_type)] # I need to look where I want to plug it # if len(all_candidates) == 0: ### 2) Je dois creer une nouvelle branche : aucun noeud ### G = gh.add_node_after(G, node_name) else: ### 3) Je rattache a plusieurs noeuds ### Ici : il faut parfois rajouter un noeud en AMONT, si on a des types qui n'ont pas ete rajouter types_added = unnest_tuple([t for t, n in all_candidates]) types_not_added = diff(unested_var_type, types_added) if len(types_not_added) > 0: name_of_cat = "Selector_%s" % unnest_tuple(types_not_added) new_node = (name_of_cat, (name_of_cat, SpecialModels.ColumnsSelector)) G = gh.add_node_after(G, new_node) new_steps[ new_node] = types_not_added # I also must dynamically add the node to the list of steps all_candidates = all_candidates + [ (types_not_added, new_node) ] G = gh.add_node_after(G, node_name, *[n for t, n in all_candidates]) ### 3) Include composition node on top for step_name, model_name, _ in reversed(all_composition_steps): starting_nodes = gh.get_starting_nodes(G) for n in starting_nodes: G.add_edge((step_name, model_name), n) ### 4) Verify the Graph structure for (step_name, model_name), _ in steps.items(): if (step_name, model_name) not in G: raise ValueError("'(%s , %s)' should be in graph" % (step_name, model_name)) # all nodes were in the steps for node in G.nodes(): if node not in steps and node not in new_steps: raise ValueError("'(%s,%s)' shouldn't be in graph" % node) assert_model_graph_structure(G) return G, new_steps