def from_numpy(X: np.ndarray, dist_type_schema: Dict[int, str] = None, lasso_beta: float = 0.0, ridge_beta: float = 0.0, use_bias: bool = False, hidden_layer_units: Iterable[int] = None, w_threshold: float = None, max_iter: int = 100, tabu_edges: List[Tuple[int, int]] = None, tabu_parent_nodes: List[int] = None, tabu_child_nodes: List[int] = None, **kwargs) -> StructureModel: """ Learn the `StructureModel`, the graph structure with lasso regularisation describing conditional dependencies between variables in data presented as a numpy array. Based on DAGs with NO TEARS. @inproceedings{zheng2018dags, author = {Zheng, Xun and Aragam, Bryon and Ravikumar, Pradeep and Xing, Eric P.}, booktitle = {Advances in Neural Information Processing Systems}, title = {{DAGs with NO TEARS: Continuous Optimization for Structure Learning}}, year = {2018}, codebase = {https://github.com/xunzheng/notears} } Args: X: 2d input data, axis=0 is data rows, axis=1 is data columns. Data must be row oriented. dist_type_schema: The dist type schema corresponding to the passed in data X. It maps the positional column in X to the string alias of a dist type. A list of alias names can be found in ``dist_type/__init__.py``. If None, assumes that all data in X is continuous. lasso_beta: Constant that multiplies the lasso term (l1 regularisation). NOTE when using nonlinearities, the l1 loss only applies to the dag_layer. use_bias: Whether to fit a bias parameter in the NOTEARS algorithm. ridge_beta: Constant that multiplies the ridge term (l2 regularisation). When using nonlinear layers use of this parameter is recommended. hidden_layer_units: An iterable where its length determine the number of layers used, and the numbers determine the number of nodes used for the layer in order. w_threshold: fixed threshold for absolute edge weights. max_iter: max number of dual ascent steps during optimisation. tabu_edges: list of edges(from, to) not to be included in the graph. tabu_parent_nodes: list of nodes banned from being a parent of any other nodes. tabu_child_nodes: list of nodes banned from being a child of any other nodes. **kwargs: additional arguments for NOTEARS MLP model Returns: StructureModel: a graph of conditional dependencies between data variables. Raises: ValueError: If X does not contain data. ValueError: If schema does not correspond to columns. """ # n examples, d properties if not X.size: raise ValueError("Input data X is empty, cannot learn any structure") logging.info("Learning structure using 'NOTEARS' optimisation.") # Check array for NaN or inf values check_array(X) if dist_type_schema is not None: # make sure that there is one provided key per column if set(range(X.shape[1])).symmetric_difference( set(dist_type_schema.keys())): raise ValueError( "Difference indices and expected indices. Got {} schema". format(dist_type_schema)) # if dist_type_schema is None, assume all columns are continuous, else init the alias mapped object dist_types = ( [DistTypeContinuous(idx=idx) for idx in np.arange(X.shape[1])] if dist_type_schema is None else [ dist_type_aliases[alias](idx=idx) for idx, alias in dist_type_schema.items() ]) # shape of X before preprocessing _, d_orig = X.shape # perform dist type pre-processing (i.e. column expansion) for dist_type in dist_types: # NOTE: preprocess_X must be called first to perform possible column expansions X = dist_type.preprocess_X(X) tabu_edges = dist_type.preprocess_tabu_edges(tabu_edges) tabu_parent_nodes = dist_type.preprocess_tabu_nodes(tabu_parent_nodes) tabu_child_nodes = dist_type.preprocess_tabu_nodes(tabu_child_nodes) # shape of X after preprocessing _, d = X.shape # if None or empty, convert into a list with single item if hidden_layer_units is None: hidden_layer_units = [0] elif isinstance(hidden_layer_units, list) and not hidden_layer_units: hidden_layer_units = [0] # if no hidden layer units, still take 1 iteration step with bounds hidden_layer_bnds = hidden_layer_units[0] if hidden_layer_units[0] else 1 # Flip i and j because Pytorch flattens the vector in another direction bnds = [ (0, 0) if i == j else (0, 0) if tabu_edges is not None and (i, j) in tabu_edges else (0, 0) if tabu_parent_nodes is not None and i in tabu_parent_nodes else (0, 0) if tabu_child_nodes is not None and j in tabu_child_nodes else (None, None) for j in range(d) for _ in range(hidden_layer_bnds) for i in range(d) ] model = NotearsMLP(n_features=d, dist_types=dist_types, hidden_layer_units=hidden_layer_units, lasso_beta=lasso_beta, ridge_beta=ridge_beta, bounds=bnds, use_bias=use_bias, **kwargs) model.fit(X, max_iter=max_iter) sm = StructureModel(model.adj) if w_threshold: sm.remove_edges_below_threshold(w_threshold) # extract the mean effect and add as edge attribute mean_effect = model.adj_mean_effect for u, v, edge_dict in sm.edges.data(True): sm.add_edge( u, v, origin="learned", weight=edge_dict["weight"], mean_effect=mean_effect[u, v], ) # set bias as node attribute bias = model.bias for node in sm.nodes(): value = None if bias is not None: value = bias[node] sm.nodes[node]["bias"] = value # attach each dist_type object to corresponding node(s) for dist_type in dist_types: sm = dist_type.add_to_node(sm) # preserve the structure_learner as a graph attribute sm.graph["structure_learner"] = model # collapse the adj down and store as graph attr adj = deepcopy(model.adj) for dist_type in dist_types: adj = dist_type.collapse_adj(adj) sm.graph["graph_collapsed"] = StructureModel(adj[:d_orig, :d_orig]) return sm
def from_numpy( X: np.ndarray, lasso_beta: float = 0.0, ridge_beta: float = 0.0, use_bias: bool = False, hidden_layer_units: Iterable[int] = None, w_threshold: float = None, max_iter: int = 100, tabu_edges: List[Tuple[int, int]] = None, tabu_parent_nodes: List[int] = None, tabu_child_nodes: List[int] = None, **kwargs ) -> StructureModel: """ Learn the `StructureModel`, the graph structure with lasso regularisation describing conditional dependencies between variables in data presented as a numpy array. Based on DAGs with NO TEARS. @inproceedings{zheng2018dags, author = {Zheng, Xun and Aragam, Bryon and Ravikumar, Pradeep and Xing, Eric P.}, booktitle = {Advances in Neural Information Processing Systems}, title = {{DAGs with NO TEARS: Continuous Optimization for Structure Learning}}, year = {2018}, codebase = {https://github.com/xunzheng/notears} } Args: X: 2d input data, axis=0 is data rows, axis=1 is data columns. Data must be row oriented. lasso_beta: Constant that multiplies the lasso term (l1 regularisation). NOTE when using nonlinearities, the l1 loss only applies to the dag_layer. use_bias: Whether to fit a bias parameter in the NOTEARS algorithm. ridge_beta: Constant that multiplies the ridge term (l2 regularisation). When using nonlinear layers use of this parameter is recommended. hidden_layer_units: An iterable where its length determine the number of layers used, and the numbers determine the number of nodes used for the layer in order. w_threshold: fixed threshold for absolute edge weights. max_iter: max number of dual ascent steps during optimisation. tabu_edges: list of edges(from, to) not to be included in the graph. tabu_parent_nodes: list of nodes banned from being a parent of any other nodes. tabu_child_nodes: list of nodes banned from being a child of any other nodes. **kwargs: additional arguments for NOTEARS MLP model Returns: StructureModel: a graph of conditional dependencies between data variables. Raises: ValueError: If X does not contain data. """ # n examples, d properties if not X.size: raise ValueError("Input data X is empty, cannot learn any structure") logging.info("Learning structure using 'NOTEARS' optimisation.") # Check array for NaN or inf values check_array(X) _, d = X.shape # if None or empty, convert into a list with single item if hidden_layer_units is None: hidden_layer_units = [0] elif isinstance(hidden_layer_units, list) and not hidden_layer_units: hidden_layer_units = [0] # if no hidden layer units, still take 1 iteration step with bounds hidden_layer_bnds = hidden_layer_units[0] if hidden_layer_units[0] else 1 # Flip i and j because Pytorch flattens the vector in another direction bnds = [ (0, 0) if i == j else (0, 0) if tabu_edges is not None and (i, j) in tabu_edges else (0, 0) if tabu_parent_nodes is not None and i in tabu_parent_nodes else (0, 0) if tabu_child_nodes is not None and j in tabu_child_nodes else (None, None) for j in range(d) for _ in range(hidden_layer_bnds) for i in range(d) ] model = NotearsMLP( n_features=d, hidden_layer_units=hidden_layer_units, lasso_beta=lasso_beta, ridge_beta=ridge_beta, bounds=bnds, use_bias=use_bias, **kwargs ) model.fit(X, max_iter=max_iter) sm = StructureModel(model.adj) if w_threshold: sm.remove_edges_below_threshold(w_threshold) mean_effect = model.adj_mean_effect # extract the mean effect and add as edge attribute for u, v, edge_dict in sm.edges.data(True): sm.add_edge( u, v, origin="learned", weight=edge_dict["weight"], mean_effect=mean_effect[u, v], ) # set bias as node attribute bias = model.bias for node in sm.nodes(): value = None if bias is not None: value = bias[node] sm.nodes[node]["bias"] = value # preserve the structure_learner as a graph attribute sm.graph["structure_learner"] = model return sm