def dask_data_node(g, node, node_name, data, q_desc_ids): n_parents = len(g.in_edges(node_name)) if n_parents == 0: idx = node["idx"] node["dask"] = delayed(select_numeric(q_desc_ids.index(idx)))(data) else: # Select the relevant output parent_relative_idx, parent_function = _get_parents_of_data_node( g, node, node_name) node["dask"] = delayed( select_numeric(parent_relative_idx))(parent_function) return
def _numeric_inputs(g, parents): # Returns the 'numeric' inputs of the parent nodes collector = [] for rel_idx, n, model_type in parents: X = compute(g, n) selected = select_numeric(X, rel_idx) collector.append(selected) return collector
def dask_merge_node(g, node, node_name): # Parent nodes parent_nodes = [s for s, t in g.in_edges(node_name)] parent_functions = [g.nodes[n]["dask"] for n in parent_nodes] parent_targets = [g.nodes[n]["tgt"] for n in parent_nodes] inputs = zip(parent_functions, parent_targets) # Incorporate extra step(s) for idx, (f1, t) in enumerate(inputs): f2 = delayed(select_numeric(t.index(node["idx"])))(f1) parent_functions[idx] = f2 node["dask"] = delayed(partial(np.mean, axis=0))(parent_functions) return
def base_inference_algorithm(g): # Convert the graph to its functions sorted_nodes = list(nx.topological_sort(g)) msg = """ sorted_nodes: {} """.format(sorted_nodes) debug_print(msg, level=1, V=VERBOSITY) functions = {} q_desc_ids = list(get_ids(g, kind="desc")) for node_name in sorted_nodes: node = g.nodes(data=True)[node_name] if node.get("kind", None) == "data": if len(nx.ancestors(g, node_name)) == 0: functions[node_name] = select_numeric( q_desc_ids.index(node["idx"])) else: # Select the relevant output previous_node = [t[0] for t in g.in_edges(node_name)][0] previous_t_idx = g.nodes[previous_node]["tgt"] relevant_idx = previous_t_idx.index(node["idx"]) functions[node_name] = o(select_numeric(relevant_idx), functions[previous_node]) elif node.get("kind", None) == "imputation": functions[node_name] = node["function"] elif node.get("kind", None) == "model": previous_nodes = [t[0] for t in g.in_edges(node_name)] inputs = { g.nodes[n]["tgt"][0]: functions[n] for n in previous_nodes } inputs = [ inputs[k] for k in sorted(inputs) ] # We need to sort to get the inputs in the correct order. inputs = o(np.transpose, x(*inputs, return_type=np.array)) f = node["function"] functions[node_name] = o(f, inputs) elif node.get("kind", None) == "prob": # Select the relevant output prob_idx = node["idx"] prob_classes = node["classes"] previous_nodes = [t[0] for t in g.in_edges(node_name)] previous_classes = [ g.edges[t]["classes"] for t in g.in_edges(node_name) ] previous_t_idx = [g.nodes[n]["tgt"] for n in previous_nodes] inputs = [(functions[n], t, c) for n, t, c in zip( previous_nodes, previous_t_idx, previous_classes)] for idx, (f1, t, c) in enumerate(inputs): f2 = o(select_nominal(t.index(prob_idx)), f1) if len(c) < len(prob_classes): f2 = o(pad_proba(c, prob_classes), f2) inputs[idx] = f2 f = partial(np.sum, axis=0) functions[node_name] = o(f, x(*inputs, return_type=np.array)) elif node.get("kind", None) == "vote": # Convert probabilistic votes to single prediction previous_node = [t[0] for t in g.in_edges(node_name)][0] functions[node_name] = o(node["function"], functions[previous_node]) elif node.get("kind", None) == "merge": merge_idx = node["idx"] previous_nodes = [t[0] for t in g.in_edges(node_name)] previous_t_idx = [g.nodes[n]["tgt"] for n in previous_nodes] inputs = [(functions[n], t) for n, t in zip(previous_nodes, previous_t_idx)] inputs = [ o(select_numeric(t_idx.index(merge_idx)), f) for f, t_idx in inputs ] inputs = o(np.transpose, x(*inputs, return_type=np.array)) f = partial(np.mean, axis=1) functions[node_name] = o(f, inputs) return functions
def f(rel_idx): f1 = select_numeric(g.data, rel_idx) return f1