Exemple #1
0
def test_panel_aggregation():
    ind = pd.date_range('1/1/2000', periods=100)
    data = np.random.randn(2, len(ind), 4)

    wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
               minor_axis=['A', 'B', 'C', 'D'])

    tg = TimeGrouper('M', axis=1)
    _, grouper, _ = tg._get_grouper(wp)
    bingrouped = wp.groupby(grouper)
    binagg = bingrouped.mean()

    def f(x):
        assert (isinstance(x, Panel))
        return x.mean(1)

    result = bingrouped.agg(f)
    tm.assert_panel_equal(result, binagg)
def test_panel_aggregation():
    ind = pd.date_range('1/1/2000', periods=100)
    data = np.random.randn(2, len(ind), 4)

    wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
               minor_axis=['A', 'B', 'C', 'D'])

    tg = TimeGrouper('M', axis=1)
    _, grouper, _ = tg._get_grouper(wp)
    bingrouped = wp.groupby(grouper)
    binagg = bingrouped.mean()

    def f(x):
        assert (isinstance(x, Panel))
        return x.mean(1)

    result = bingrouped.agg(f)
    tm.assert_panel_equal(result, binagg)
def Regroup(groupinfo, labels, *args):
    """
    Modify the 3D numpy arrays in *args so that data is grouped
    according to user specifications.

    For example, presume that the following scenarios are given:

    Fast_Down_01
    Slow_Down_01
    Fast_Down_02
    Slow_Down_02
    Fast_Down_04
    Slow_Down_04
    Fast_Down_08
    Slow_Down_08

    And only a single track run is specified: SCIT.
    In this example, the third item, skill scores, can be arbitrary.
    Now, supposed that we want to display the result data such that the x-axis
    is for the Down* and there are two plots: one for Fast and one for Slow.

    So, we group the scenarios data by some key (discussed later) _into_
    the trackruns dimension. For this reason, the data dimension being
    grouped into (in this case, trackruns) must originally be singleton.

    *groupinfo* - dict with keys "group", "into", and "by".
        The "group" element states which dimension the grouping will occur on.
        The "into" element states along which dimension the groups will be
        stacked. These two elements can have values of "scenarios", "skills",
        or "trackruns".

        The "by" element is rudimentary for now, but it controls the key value
        function used for grouping. The key function is applied to the list of
        default labels for the dimension stated for "group". The unique set of
        keys generated by the function on these labels become the new default
        labels for the "into" dimension.

        Currently, the keyfunction is hard-coded to split the label by under-
        scores and search for the string given in "by" in the resulting list.
        It then returns the list's next value. So, in the above example, the
        new labels for the "trackruns" dimension would be "01", "02", "04", and
        "08".
    """
    if groupinfo is None:
        return args

    if len(args) == 0:
        return args

    if len(labels[groupinfo['into']]) != 1:
        raise ValueError("Dim %s is not singleton!" % groupinfo['into'])

    if groupinfo['group'] == groupinfo['into']:
        raise ValueError("Can not group %s dimension into itself!" %
                         groupinfo['group'])

    from pandas import Panel

    grpAxis = dataAxes[groupinfo['group']]
    intoAxis = dataAxes[groupinfo['into']]
    otherAxis = dataAxes[list(
        set(['scenarios', 'trackruns', 'skills']) -
        set([groupinfo['group'], groupinfo['into']]))[0]]

    # !!Temporary!! restricted functionality for just trackrun variables
    keyfunc = lambda x: _varval(x, groupinfo['by'])

    g_args = []
    for a in args:
        wp = Panel(a,
                   items=labels['scenarios'],
                   major_axis=labels['skills'],
                   minor_axis=labels['trackruns'])

        grouped = wp.groupby(keyfunc, axis=grpAxis)

        if len(grouped) == 0:
            raise ValueError("Grouping didn't result in anything!")

        intolabs, g_a = zip(*grouped)
        # Get a list of numpy arrays from the list of Panels
        g_a = np.concatenate(map(lambda x: x.values, g_a), axis=intoAxis)

        g_args.append(g_a)

    labels[groupinfo['into']] = intolabs

    # Do the full set for error-checking purposes
    trunclabs = None
    for intolab in intolabs:
        # TODO: Generalize this!
        # Take some original labels and remove the variable and its value that
        # were used to make *intolabs*
        tmp = [
            '_'.join(_remove_varval(lab.split('_'), groupinfo['by']))
            for lab in grouped.groups[intolab]
        ]
        if trunclabs is not None:
            if tmp != trunclabs:
                raise ValueError("The labels do not match! %s\n%s" %
                                 (trunclabs, tmp))
        else:
            trunclabs = tmp

    labels[groupinfo['group']] = trunclabs

    return g_args
def Regroup(groupinfo, labels, *args) :
    """
    Modify the 3D numpy arrays in *args so that data is grouped
    according to user specifications.

    For example, presume that the following scenarios are given:

    Fast_Down_01
    Slow_Down_01
    Fast_Down_02
    Slow_Down_02
    Fast_Down_04
    Slow_Down_04
    Fast_Down_08
    Slow_Down_08

    And only a single track run is specified: SCIT.
    In this example, the third item, skill scores, can be arbitrary.
    Now, supposed that we want to display the result data such that the x-axis
    is for the Down* and there are two plots: one for Fast and one for Slow.

    So, we group the scenarios data by some key (discussed later) _into_
    the trackruns dimension. For this reason, the data dimension being
    grouped into (in this case, trackruns) must originally be singleton.

    *groupinfo* - dict with keys "group", "into", and "by".
        The "group" element states which dimension the grouping will occur on.
        The "into" element states along which dimension the groups will be
        stacked. These two elements can have values of "scenarios", "skills",
        or "trackruns".

        The "by" element is rudimentary for now, but it controls the key value
        function used for grouping. The key function is applied to the list of
        default labels for the dimension stated for "group". The unique set of
        keys generated by the function on these labels become the new default
        labels for the "into" dimension.

        Currently, the keyfunction is hard-coded to split the label by under-
        scores and search for the string given in "by" in the resulting list.
        It then returns the list's next value. So, in the above example, the
        new labels for the "trackruns" dimension would be "01", "02", "04", and
        "08".
    """
    if groupinfo is None :
        return args

    if len(args) == 0 :
        return args

    if len(labels[groupinfo['into']]) != 1 :
        raise ValueError("Dim %s is not singleton!" % groupinfo['into'])

    if groupinfo['group'] == groupinfo['into'] :
        raise ValueError("Can not group %s dimension into itself!" %
                         groupinfo['group'])

    from pandas import Panel

    grpAxis = dataAxes[groupinfo['group']]
    intoAxis = dataAxes[groupinfo['into']]
    otherAxis = dataAxes[list(set(['scenarios', 'trackruns', 'skills']) -
                              set([groupinfo['group'], groupinfo['into']]))[0]]

    # !!Temporary!! restricted functionality for just trackrun variables
    keyfunc = lambda x : _varval(x, groupinfo['by'])

    g_args = []
    for a in args :
        wp = Panel(a, items=labels['scenarios'],
                      major_axis=labels['skills'],
                      minor_axis=labels['trackruns'])

        grouped = wp.groupby(keyfunc, axis=grpAxis)

        if len(grouped) == 0 :
            raise ValueError("Grouping didn't result in anything!")

        intolabs, g_a = zip(*grouped)
        # Get a list of numpy arrays from the list of Panels
        g_a = np.concatenate(map(lambda x : x.values, g_a),
                             axis=intoAxis)

        g_args.append(g_a)

    labels[groupinfo['into']] = intolabs

    # Do the full set for error-checking purposes
    trunclabs = None
    for intolab in intolabs :
        # TODO: Generalize this!
        # Take some original labels and remove the variable and its value that
        # were used to make *intolabs*
        tmp = ['_'.join(_remove_varval(lab.split('_'), groupinfo['by'])) for
               lab in grouped.groups[intolab]]
        if trunclabs is not None :
            if tmp != trunclabs :
                raise ValueError("The labels do not match! %s\n%s" %
                                 (trunclabs, tmp))
        else :
            trunclabs = tmp

    labels[groupinfo['group']] = trunclabs

    return g_args