예제 #1
0
파일: summary.py 프로젝트: 4sp1r3/bamboo
def summarize(dataset, dframe, groups, no_cache, update=False):
    """Raises a ColumnTypeError if grouping on a non-dimensional column."""
    # do not allow group by numeric types
    for group in groups:
        if not dataset.is_factor(group):
            raise ColumnTypeError("group: '%s' is not a dimension." % group)

    group_str = dataset.join_groups(groups) or dataset.ALL

    # check cached stats for group and update as necessary
    stats = dataset.stats
    group_stats = stats.get(group_str)

    if no_cache or not group_stats or update:
        group_stats = summarize_with_groups(dframe, groups, dataset) if groups else summarize_df(dframe, dataset)

        if not no_cache:
            if update:
                original_group_stats = stats.get(group_str, {})
                group_stats = combine_dicts(original_group_stats, group_stats)

            stats.update({group_str: group_stats})
            dataset.update({dataset.STATS: dict_for_mongo(stats)})

    stats_dict = dict_from_mongo(group_stats)

    if groups:
        stats_dict = {group_str: stats_dict}

    return stats_dict
예제 #2
0
def summarize(dataset, dframe, groups, no_cache, update=False):
    """Raises a ColumnTypeError if grouping on a non-dimensional column."""
    # do not allow group by numeric types
    for group in groups:
        if not dataset.is_factor(group):
            raise ColumnTypeError("group: '%s' is not a dimension." % group)

    group_str = dataset.join_groups(groups) or dataset.ALL

    # check cached stats for group and update as necessary
    stats = dataset.stats
    group_stats = stats.get(group_str)

    if no_cache or not group_stats or update:
        group_stats = summarize_with_groups(dframe, groups, dataset) if\
            groups else summarize_df(dframe, dataset)

        if not no_cache:
            if update:
                original_group_stats = stats.get(group_str, {})
                group_stats = combine_dicts(original_group_stats, group_stats)

            stats.update({group_str: group_stats})
            dataset.update({dataset.STATS: dict_for_mongo(stats)})

    stats_dict = dict_from_mongo(group_stats)

    if groups:
        stats_dict = {group_str: stats_dict}

    return stats_dict
예제 #3
0
def summarize(dataset, dframe, groups, group_str, no_cache):
    """Raises a ColumnTypeError if grouping on a non-dimensional column."""
    # do not allow group by numeric types
    for group in groups:
        if group != dataset.ALL and not dataset.schema.is_dimension(group):
            raise ColumnTypeError("group: '%s' is not a dimension." % group)

    # check cached stats for group and update as necessary
    stats = dataset.stats
    if no_cache or not stats.get(group_str):
        group_stats = summarize_df(dframe, dataset=dataset) if\
            group_str == dataset.ALL else\
            summarize_with_groups(dframe, groups, dataset)
        stats.update({group_str: group_stats})
        if not no_cache:
            dataset.update({dataset.STATS: dict_for_mongo(stats)})

    stats_to_return = dict_from_mongo(stats.get(group_str))

    return stats_to_return if group_str == dataset.ALL else {
        group_str: stats_to_return}