def naive_factorization(data=None, node_id=0, context=None, scope=None, **kwargs): assert scope is not None, "No scope" prod_node = Product() prod_node.scope = scope prod_node.id = node_id y, x = get_YX(data, context.feature_size) result = [] for i, rv in enumerate(scope): prod_node.children.append(None) data_slice = concatenate_yx(y[:, i].reshape(-1, 1), x) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": prod_node.id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) return prod_node, result
def _deserialize_product(self, node, node_map): child_ids = node.product.children # Resolve references to child nodes by ID. children = [node_map.get(id) for id in child_ids] # Check all childs have been resolved. assert None not in children, "Child node ID could not be resolved" product = Product(children = children) product.id = node.id return product
def remove_non_informative_features(data=None, node_id=0, scope=None, context=0, uninformative_features_idx=None, **kwargs): assert uninformative_features_idx is not None, "parameter uninformative_features_idx can't be None" prod_node = Product() prod_node.scope = scope prod_node.id = node_id y, x = get_YX(data, context.feature_size) non_zero_variance_rvs = [] non_zero_variance_idx = [] result = [] for idx, zero_var in enumerate(uninformative_features_idx): rv = scope[idx] if not zero_var: non_zero_variance_rvs.append(rv) non_zero_variance_idx.append(idx) continue prod_node.children.append(None) data_slice = concatenate_yx(y[:, idx].reshape(-1, 1), x) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": prod_node.id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) assert len(result) > 0 if len(non_zero_variance_idx) > 0: prod_node.children.append(None) result.append(( SplittingOperations.GET_NEXT_OP, { "data": concatenate_yx(data[:, non_zero_variance_idx], x), "parent_id": prod_node.id, "pos": len(prod_node.children) - 1, "scope": non_zero_variance_rvs, }, )) return prod_node, result
def create_product(data=None, node_id=0, parent_id=0, pos=0, context=None, scope=None, split_cols=None, **kwargs): assert split_cols is not None, "No split_cols lambda" assert scope is not None, "No scope" data_slices = split_cols(data, context, scope) result = [] if len(data_slices) == 1: result.append(( SplittingOperations.GET_NEXT_OP, { "data": data, "parent_id": parent_id, "pos": pos, "no_independencies": True, "scope": scope, }, )) return None, result node = Product() node.scope.extend(scope) node.id = node_id for data_slice, scope_slice, _ in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) result.append(( SplittingOperations.GET_NEXT_OP, { "data": data_slice, "parent_id": node_id, "pos": len(node.children) - 1, "scope": scope_slice, }, )) return node, result
def remove_non_informative_features(data=None, node_id=0, scope=None, **kwargs): prod_node = Product() prod_node.scope = scope prod_node.id = node_id uninformative_features_idx = np.var(data[:, scope], 0) == 0 zero_variance_rvs = [s for s in scope] result = [] for idx, zero_var in enumerate(uninformative_features_idx): if not zero_var: continue prod_node.children.append(None) rv = scope[idx] data_slice = data[:, rv].reshape(-1, 1) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": node_id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) del zero_variance_rvs[idx] assert len(result) > 0 prod_node.children.append(None) result.append(( SplittingOperations.GET_NEXT_OP, { "data": data[:, zero_variance_rvs], "parent_id": node_id, "pos": len(prod_node.children) - 1, "scope": zero_variance_rvs, }, )) return prod_node, result