def naive_factorization(data=None,
                        node_id=0,
                        context=None,
                        scope=None,
                        **kwargs):
    assert scope is not None, "No scope"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    y, x = get_YX(data, context.feature_size)

    result = []
    for i, rv in enumerate(scope):
        prod_node.children.append(None)
        data_slice = concatenate_yx(y[:, i].reshape(-1, 1), x)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))

    return prod_node, result
 def _deserialize_product(self, node, node_map):
     child_ids = node.product.children
     # Resolve references to child nodes by ID.
     children = [node_map.get(id) for id in child_ids]
     # Check all childs have been resolved.
     assert None not in children, "Child node ID could not be resolved"
     product = Product(children = children)
     product.id = node.id
     return product
def remove_non_informative_features(data=None,
                                    node_id=0,
                                    scope=None,
                                    context=0,
                                    uninformative_features_idx=None,
                                    **kwargs):
    assert uninformative_features_idx is not None, "parameter uninformative_features_idx can't be None"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    y, x = get_YX(data, context.feature_size)

    non_zero_variance_rvs = []
    non_zero_variance_idx = []
    result = []
    for idx, zero_var in enumerate(uninformative_features_idx):
        rv = scope[idx]

        if not zero_var:
            non_zero_variance_rvs.append(rv)
            non_zero_variance_idx.append(idx)
            continue

        prod_node.children.append(None)
        data_slice = concatenate_yx(y[:, idx].reshape(-1, 1), x)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))
    assert len(result) > 0
    if len(non_zero_variance_idx) > 0:
        prod_node.children.append(None)
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": concatenate_yx(data[:, non_zero_variance_idx], x),
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": non_zero_variance_rvs,
            },
        ))

    return prod_node, result
def create_product(data=None,
                   node_id=0,
                   parent_id=0,
                   pos=0,
                   context=None,
                   scope=None,
                   split_cols=None,
                   **kwargs):
    assert split_cols is not None, "No split_cols lambda"
    assert scope is not None, "No scope"
    data_slices = split_cols(data, context, scope)

    result = []

    if len(data_slices) == 1:
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": data,
                "parent_id": parent_id,
                "pos": pos,
                "no_independencies": True,
                "scope": scope,
            },
        ))
        return None, result

    node = Product()
    node.scope.extend(scope)
    node.id = node_id

    for data_slice, scope_slice, _ in data_slices:
        assert isinstance(scope_slice, list), "slice must be a list"

        node.children.append(None)
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": data_slice,
                "parent_id": node_id,
                "pos": len(node.children) - 1,
                "scope": scope_slice,
            },
        ))

    return node, result
def remove_non_informative_features(data=None,
                                    node_id=0,
                                    scope=None,
                                    **kwargs):
    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    uninformative_features_idx = np.var(data[:, scope], 0) == 0
    zero_variance_rvs = [s for s in scope]
    result = []
    for idx, zero_var in enumerate(uninformative_features_idx):
        if not zero_var:
            continue
        prod_node.children.append(None)
        rv = scope[idx]
        data_slice = data[:, rv].reshape(-1, 1)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": node_id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))
        del zero_variance_rvs[idx]
    assert len(result) > 0
    prod_node.children.append(None)
    result.append((
        SplittingOperations.GET_NEXT_OP,
        {
            "data": data[:, zero_variance_rvs],
            "parent_id": node_id,
            "pos": len(prod_node.children) - 1,
            "scope": zero_variance_rvs,
        },
    ))
    return prod_node, result