예제 #1
0
def buc(df, pre, df2):
    if df.shape[0] == 1:
        binLst = [
            bin(i)[3:] for i in range(2**df.shape[1], 2**(df.shape[1] + 1), 2)
        ]
        for s in binLst:
            tmpLst = [
                'ALL' if s[i] == '1' else list(df.iloc[0])[i]
                for i in range(len(s))
            ]
            tmpLst = pre + tmpLst
            df2.loc[len(df2)] = tmpLst

    elif df.shape[1] == 1:
        _pre = pre.copy()
        _pre.append(sum(helper.project_data(df, 0)))
        df2.loc[len(df2)] = _pre

    else:
        valLst = sorted(list(set(helper.project_data(df, 0).values)))
        _pre = pre.copy()
        for val in valLst:
            subdf = helper.slice_data_dim0(df, val)
            pre_ = _pre.copy()
            pre_.append(val)
            buc(subdf, pre_, df2)
        subdf = helper.remove_first_dim(df)
        pre_ = _pre.copy()
        pre_.append('ALL')

        buc(subdf, pre_, df2)
예제 #2
0
    def buc_rec(input, l=[]):
        dims = input.shape[1]
        # Note that input is a DataFrame
        if input.shape[0] == 1 and dims == 3:
            a = input.iloc[0, 0]
            b = input.iloc[0, 1]
            s = input.iloc[0, 2]
            ll.extend([
                l + [a, b, s], l + [a, "ALL", s], l + ["ALL", b, s],
                l + ["ALL", "ALL", s]
            ])
            return

        if dims == 1:
            # only the measure dim
            input_sum = sum(helper.project_data(input, 0))
            input_sum = "%.1f" % input_sum
            ll.append(l + [input_sum])
        else:
            # the general case
            dim0_vals = set(helper.project_data(input, 0).values)

            for dim0_v in dim0_vals:
                sub_data = helper.slice_data_dim0(input, dim0_v)
                buc_rec(sub_data, l + [dim0_v])
        ## for R_{ALL}
            sub_data = helper.remove_first_dim(input)
            buc_rec(sub_data, l + ['ALL'])
예제 #3
0
def _buc_rec_optimized(df, pre_num, df_out):  # help function
    # Note that input is a DataFrame
    dims = df.shape[1]

    if dims == 1:
        # only the measure dim
        input_sum = sum(helper.project_data(df, 0))
        pre_num.append(input_sum)

        df_out.loc[len(df_out)] = pre_num

    else:
        # the general case

        dim0_vals = set(helper.project_data(df, 0).values)
        temp_pre_num = deepcopy(pre_num)
        for dim0_v in dim0_vals:
            pre_num = deepcopy(temp_pre_num)
            sub_data = helper.slice_data_dim0(df, dim0_v)
            pre_num.append(dim0_v)

            _buc_rec_optimized(sub_data, pre_num, df_out)
        ## for R_{ALL}
        sub_data = helper.remove_first_dim(df)

        pre_num = deepcopy(temp_pre_num)
        pre_num.append("ALL")
        _buc_rec_optimized(sub_data, pre_num, df_out)
예제 #4
0
def buc_rec_3_params(input, result, pre=[]):
    rows = input.shape[0]
    dims = input.shape[1]
    new_pre = pre.copy()
    if dims >= 2 and rows == 1:
        new_pre += input.iloc[0, ].tolist()
        new_rows = single_line_opt(tuple(new_pre), dims)
        result += new_rows
        return result
    elif dims == 1:
        # only the measure dim
        input_sum = sum(helper.project_data(input, 0))
        new_pre.append(input_sum)
        result.append(new_pre)
        return result
    else:
        # the general case
        dim0_vals = set(helper.project_data(input, 0).values)
        for dim0_v in dim0_vals:
            new_pre.append(dim0_v)
            sub_data = helper.slice_data_dim0(input, dim0_v)
            result = buc_rec_3_params(sub_data, result, new_pre)
            new_pre = pre.copy()
        ## for R_{ALL}
        sub_data = helper.remove_first_dim(input)
        new_pre.append('ALL')
        result = buc_rec_3_params(sub_data, result, new_pre)
        return result
예제 #5
0
def buc(df, result, prefix=[]):
    dims = df.shape[1]
    pre_cp = [i for i in prefix]
    if dims == 1:
        pre_cp.append(sum(helper.project_data(df, 0)))
        result.append(pre_cp)
    else:
        dim0_vals = set(helper.project_data(df, 0).values)
        for i in dim0_vals:
            pre_cp.append(i)
            rest_data = helper.slice_data_dim0(df, i)
            buc(rest_data, result, pre_cp)
            pre_cp = [i for i in prefix]
        pre_cp.append('ALL')
        rest_data = helper.remove_first_dim(df)
        buc(rest_data, result, pre_cp)
    return result
예제 #6
0
def my_buc_rec_optimized(df, pre, res):  # help recursive function
    dims = df.shape[1]
    if df.shape[0] == 1:
        single_tuple(df, pre, res)
    elif dims == 1:
        pre.append(sum(helper.project_data(df, 0)))
        res.loc[len(res)] = pre
    else:
        vals = set(helper.project_data(df, 0).values)
        pre_copy = pre.copy()
        for val in vals:
            pre = pre_copy.copy()
            sub_data = helper.slice_data_dim0(df, val)
            pre.append(val)
            my_buc_rec_optimized(sub_data, pre, res)

        sub_data = helper.remove_first_dim(df)
        pre = pre_copy.copy()
        pre.append("ALL")
        my_buc_rec_optimized(sub_data, pre, res)
예제 #7
0
def cal_mul_tuple(df, result, check):
    dims = df.shape[1]
    check_trace = copy_list(check)
    if dims == 1:
        # only the measure dim
        input_sum = sum(helper.project_data(df, 0))
        check_trace.append(input_sum)
        result.append(check_trace)
    else:
        # the general case
        dim0_vals = set(helper.project_data(df, 0).values)
        for dim0_v in dim0_vals:
            check_trace.append(dim0_v)
            sub_data = helper.slice_data_dim0(df, dim0_v)
            cal_mul_tuple(sub_data, result, check_trace)
            check_trace = copy_list(check)
        ## for R_{ALL}
        sub_data = helper.remove_first_dim(df)
        check_trace.append('ALL')
        cal_mul_tuple(sub_data, result, check_trace)
    return result
예제 #8
0
def buc_rec_general(df, result, prefix=[]):
    dims = df.shape[1]
    rows = df.shape[0]
    pre_cp = [i for i in prefix]
    if dims == 1:
        input_sum = sum(helper.project_data(df, 0))
        pre_cp.append(input_sum)
        result.append(pre_cp)
    elif rows == 1:
        s_tuple = list(df.iloc[0])
        tuples = buc_single(pre_cp, s_tuple, dims)
        # tuples 是[[],[]....]形式
        result += tuples
    else:
        dim0_vals = set(helper.project_data(df, 0).values)
        for dim0_v in dim0_vals:
            pre_cp.append(dim0_v)
            sub_data = helper.slice_data_dim0(df, dim0_v)
            buc_rec_general(sub_data, result, pre_cp)
            pre_cp = [i for i in prefix]
        pre_cp.append('ALL')
        sub_data = helper.remove_first_dim(df)
        buc_rec_general(sub_data, result, pre_cp)
    return result
예제 #9
0
def buc(df, df_out, pre=[]):
    dims = df.shape[1]
    if dims == 1:  # one column left
        all = [] # sum result on column
        for x in helper.project_data(df, 0):
            all.append(int(x))
        pre.append(sum(all))
        df_out.loc[len(df_out)] = pre
        pre.pop()
        return 0
    elif df.shape[0] == 1:
        st_opt(df, df_out, pre)
    else:
        dim0_vals = set(helper.project_data(df, 0).values)
        for dim0_v in sorted(dim0_vals):
            sub_data = helper.slice_data_dim0(df, dim0_v)
            pre.append(dim0_v)
            buc(sub_data, df_out, pre)
            pre.pop()
        sub_data = helper.remove_first_dim(df)
        pre.append("ALL")
        buc(sub_data, df_out, pre)
        pre.pop()
    return 0
예제 #10
0
import pandas as pd