Esempio n. 1
0
def _statistic_summary(table,
                       input_cols,
                       statistics,
                       column_indices=None,
                       percentile_amounts=None,
                       trimmed_mean_amounts=None):
    tmp_table = table.copy()
    tmp_table = list(map(list, zip(*tmp_table)))
    result = []
    for i in range(len(input_cols)):
        data = [input_cols[i]]
        for st in statistics:
            if 'max' == st:
                data.append(brtc_stats.max(tmp_table[column_indices[i]]))
            elif 'min' == st:
                data.append(brtc_stats.min(tmp_table[column_indices[i]]))
            elif 'range' == st:
                data.append(brtc_stats.range(tmp_table[column_indices[i]]))
            elif 'sum' == st:
                data.append(brtc_stats.sum(tmp_table[column_indices[i]]))
            elif 'avg' == st:
                data.append(brtc_stats.mean(tmp_table[column_indices[i]]))
            elif 'variance' == st:
                data.append(brtc_stats.var_samp(tmp_table[column_indices[i]]))
            elif 'stddev' == st:
                data.append(brtc_stats.std(tmp_table[column_indices[i]]))
            elif 'skewness' == st:
                data.append(brtc_stats.skewness(tmp_table[column_indices[i]]))
            elif 'kurtosis' == st:
                data.append(brtc_stats.kurtosis(tmp_table[column_indices[i]]))
            elif 'nrow' == st:
                data.append(brtc_stats.num_row(tmp_table[column_indices[i]]))
            elif 'num_of_value' == st:
                data.append(brtc_stats.num_value(tmp_table[column_indices[i]]))
            elif 'null_count' == st:
                data.append(brtc_stats.num_null(tmp_table[column_indices[i]]))
            elif 'mode' == st:
                data.append(brtc_stats.mode(tmp_table[column_indices[i]]))
            elif 'median' == st:
                data.append(brtc_stats.median(tmp_table[column_indices[i]]))
            elif 'q1' == st:
                data.append(brtc_stats.q1(tmp_table[column_indices[i]]))
            elif 'q3' == st:
                data.append(brtc_stats.q3(tmp_table[column_indices[i]]))
            elif 'iqr' == st:
                data.append(brtc_stats.iqr(tmp_table[column_indices[i]]))
            elif 'percentile' == st and percentile_amounts is not None:
                for pa in _unique_list(percentile_amounts):
                    data.append(
                        brtc_stats.percentile(tmp_table[column_indices[i]],
                                              pa))
            elif 'trimmed_mean' == st and trimmed_mean_amounts is not None:
                for ta in _unique_list(trimmed_mean_amounts):
                    data.append(
                        brtc_stats.trimmed_mean(tmp_table[column_indices[i]],
                                                ta))
        result.append(data)
    return {'out_table': result}
Esempio n. 2
0
def _statistic_summary_original(table, input_cols, statistics, percentile_amounts=None, trimmed_mean_amounts=None):
    
    _table = table.copy()    
    data = {'column_name':input_cols}
    for st in statistics:
        if 'max' == st:
            data['max'] = [brtc_stats.max(_table[x]) for x in input_cols]
        if 'min' == st:
            data['min'] = [brtc_stats.min(_table[x]) for x in input_cols]
        if 'range' == st:
            data['range'] = [brtc_stats.range(_table[x]) for x in input_cols]
        if 'sum' == st:
            data['sum'] = [brtc_stats.sum(_table[x]) for x in input_cols]
        if 'avg' == st:
            data['avg'] = [brtc_stats.mean(_table[x]) for x in input_cols]
        if 'variance' == st:
            data['variance'] = [brtc_stats.var_samp(_table[x]) for x in input_cols]
        if 'stddev' == st:
            data['stddev'] = [brtc_stats.std(_table[x]) for x in input_cols]
        if 'skewness' == st:
            data['skewness'] = [brtc_stats.skewness(_table[x]) for x in input_cols]
        if 'kurtosis' == st:
            data['kurtosis'] = [brtc_stats.kurtosis(_table[x]) for x in input_cols]
        if 'nrow' == st:
            data['num_of_row'] = [brtc_stats.num_row(_table[x]) for x in input_cols]
        if 'num_of_value' == st:
            data['num_of_value'] = [brtc_stats.num_value(_table[x]) for x in input_cols]
        if 'null_count' == st:
            data['null_count'] = [brtc_stats.num_null(_table[x]) for x in input_cols]
        if 'mode' == st:
            data['mode'] = [list(brtc_stats.mode(_table[x])) for x in input_cols]
        if 'median' == st:
            data['median'] = [brtc_stats.median(_table[x]) for x in input_cols]
        if 'q1' == st:
            data['q1'] = [brtc_stats.q1(_table[x]) for x in input_cols]
        if 'q3' == st:
            data['q3'] = [brtc_stats.q3(_table[x]) for x in input_cols]
        if 'iqr' == st:
            data['iqr'] = [brtc_stats.iqr(_table[x]) for x in input_cols]
        if 'percentile' == st and percentile_amounts is not None:
            for pa in _unique_list(percentile_amounts):
                pa_colname = 'percentile_{}'.format(_amounts_colname(pa))
                data[pa_colname] = [brtc_stats.percentile(_table[x], pa) for x in input_cols]
        if 'trimmed_mean' == st and trimmed_mean_amounts is not None:
            for ta in _unique_list(trimmed_mean_amounts):
                ta_colname = 'trimmed_mean_{}'.format(_amounts_colname(ta))
                data[ta_colname] = [brtc_stats.trimmed_mean(_table[x], ta) for x in input_cols]
            
    result = pd.DataFrame(data)
    
    return {'out_table' : result}
Esempio n. 3
0
def _statistic_summary(table,
                       input_cols,
                       statistics,
                       percentile_amounts=[],
                       trimmed_mean_amounts=[]):

    _table = table.copy()

    percentile_amounts = np.unique(percentile_amounts)
    trimmed_mean_amounts = np.unique(trimmed_mean_amounts)

    def _amounts_colname(a):
        return str(a).replace('.', '_')

    data = {'column_name': input_cols}
    for st in statistics:
        if 'max' == st:
            data['max'] = [brtc_stats.max(_table[x]) for x in input_cols]
        if 'min' == st:
            data['min'] = [brtc_stats.min(_table[x]) for x in input_cols]
        if 'range' == st:
            data['range'] = [brtc_stats.range(_table[x]) for x in input_cols]
        if 'sum' == st:
            data['sum'] = [brtc_stats.sum(_table[x]) for x in input_cols]
        if 'avg' == st:
            data['avg'] = [brtc_stats.mean(_table[x]) for x in input_cols]
        if 'variance' == st:
            data['variance'] = [
                brtc_stats.var_samp(_table[x]) for x in input_cols
            ]
        if 'stddev' == st:
            data['stddev'] = [brtc_stats.std(_table[x]) for x in input_cols]
        if 'skewness' == st:
            data['skewness'] = [
                brtc_stats.skewness(_table[x]) for x in input_cols
            ]
        if 'kurtosis' == st:
            data['kurtosis'] = [
                brtc_stats.kurtosis(_table[x]) for x in input_cols
            ]
        if 'nrow' == st:
            data['nrow'] = [brtc_stats.num_row(_table[x]) for x in input_cols]
        if 'num_of_value' == st:
            data['num_of_value'] = [
                brtc_stats.num_value(_table[x]) for x in input_cols
            ]
    #     if 'nan_count' == st:
    #         data['nan_count'] = [brtc_stats.num_nan(_table[x]) for x in input_cols]
        if 'null_count' == st:
            data['null_count'] = [
                brtc_stats.num_null(_table[x]) for x in input_cols
            ]
        if 'mode' == st:
            data['mode'] = [brtc_stats.mode(_table[x]) for x in input_cols]
        if 'median' == st:
            data['median'] = [brtc_stats.median(_table[x]) for x in input_cols]
        if 'q1' == st:
            data['q1'] = [brtc_stats.q1(_table[x]) for x in input_cols]
        if 'q3' == st:
            data['q3'] = [brtc_stats.q3(_table[x]) for x in input_cols]
        if 'iqr' == st:
            data['iqr'] = [brtc_stats.iqr(_table[x]) for x in input_cols]
        if 'percentile' == st:
            for pa in percentile_amounts:
                pa_colname = 'percentile_{}'.format(_amounts_colname(pa))
                data[pa_colname] = [
                    brtc_stats.percentile(_table[x], pa) for x in input_cols
                ]
        if 'trimmed_mean' == st:
            for ta in trimmed_mean_amounts:
                ta_colname = 'trimmed_mean_{}'.format(_amounts_colname(ta))
                data[ta_colname] = [
                    brtc_stats.trimmed_mean(_table[x], ta) for x in input_cols
                ]

    result = pd.DataFrame(data)

    return {'out_table': result}