def test_panel_aggregation(): ind = pd.date_range('1/1/2000', periods=100) data = np.random.randn(2, len(ind), 4) wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, minor_axis=['A', 'B', 'C', 'D']) tg = TimeGrouper('M', axis=1) _, grouper, _ = tg._get_grouper(wp) bingrouped = wp.groupby(grouper) binagg = bingrouped.mean() def f(x): assert (isinstance(x, Panel)) return x.mean(1) result = bingrouped.agg(f) tm.assert_panel_equal(result, binagg)
def Regroup(groupinfo, labels, *args): """ Modify the 3D numpy arrays in *args so that data is grouped according to user specifications. For example, presume that the following scenarios are given: Fast_Down_01 Slow_Down_01 Fast_Down_02 Slow_Down_02 Fast_Down_04 Slow_Down_04 Fast_Down_08 Slow_Down_08 And only a single track run is specified: SCIT. In this example, the third item, skill scores, can be arbitrary. Now, supposed that we want to display the result data such that the x-axis is for the Down* and there are two plots: one for Fast and one for Slow. So, we group the scenarios data by some key (discussed later) _into_ the trackruns dimension. For this reason, the data dimension being grouped into (in this case, trackruns) must originally be singleton. *groupinfo* - dict with keys "group", "into", and "by". The "group" element states which dimension the grouping will occur on. The "into" element states along which dimension the groups will be stacked. These two elements can have values of "scenarios", "skills", or "trackruns". The "by" element is rudimentary for now, but it controls the key value function used for grouping. The key function is applied to the list of default labels for the dimension stated for "group". The unique set of keys generated by the function on these labels become the new default labels for the "into" dimension. Currently, the keyfunction is hard-coded to split the label by under- scores and search for the string given in "by" in the resulting list. It then returns the list's next value. So, in the above example, the new labels for the "trackruns" dimension would be "01", "02", "04", and "08". """ if groupinfo is None: return args if len(args) == 0: return args if len(labels[groupinfo['into']]) != 1: raise ValueError("Dim %s is not singleton!" % groupinfo['into']) if groupinfo['group'] == groupinfo['into']: raise ValueError("Can not group %s dimension into itself!" % groupinfo['group']) from pandas import Panel grpAxis = dataAxes[groupinfo['group']] intoAxis = dataAxes[groupinfo['into']] otherAxis = dataAxes[list( set(['scenarios', 'trackruns', 'skills']) - set([groupinfo['group'], groupinfo['into']]))[0]] # !!Temporary!! restricted functionality for just trackrun variables keyfunc = lambda x: _varval(x, groupinfo['by']) g_args = [] for a in args: wp = Panel(a, items=labels['scenarios'], major_axis=labels['skills'], minor_axis=labels['trackruns']) grouped = wp.groupby(keyfunc, axis=grpAxis) if len(grouped) == 0: raise ValueError("Grouping didn't result in anything!") intolabs, g_a = zip(*grouped) # Get a list of numpy arrays from the list of Panels g_a = np.concatenate(map(lambda x: x.values, g_a), axis=intoAxis) g_args.append(g_a) labels[groupinfo['into']] = intolabs # Do the full set for error-checking purposes trunclabs = None for intolab in intolabs: # TODO: Generalize this! # Take some original labels and remove the variable and its value that # were used to make *intolabs* tmp = [ '_'.join(_remove_varval(lab.split('_'), groupinfo['by'])) for lab in grouped.groups[intolab] ] if trunclabs is not None: if tmp != trunclabs: raise ValueError("The labels do not match! %s\n%s" % (trunclabs, tmp)) else: trunclabs = tmp labels[groupinfo['group']] = trunclabs return g_args
def Regroup(groupinfo, labels, *args) : """ Modify the 3D numpy arrays in *args so that data is grouped according to user specifications. For example, presume that the following scenarios are given: Fast_Down_01 Slow_Down_01 Fast_Down_02 Slow_Down_02 Fast_Down_04 Slow_Down_04 Fast_Down_08 Slow_Down_08 And only a single track run is specified: SCIT. In this example, the third item, skill scores, can be arbitrary. Now, supposed that we want to display the result data such that the x-axis is for the Down* and there are two plots: one for Fast and one for Slow. So, we group the scenarios data by some key (discussed later) _into_ the trackruns dimension. For this reason, the data dimension being grouped into (in this case, trackruns) must originally be singleton. *groupinfo* - dict with keys "group", "into", and "by". The "group" element states which dimension the grouping will occur on. The "into" element states along which dimension the groups will be stacked. These two elements can have values of "scenarios", "skills", or "trackruns". The "by" element is rudimentary for now, but it controls the key value function used for grouping. The key function is applied to the list of default labels for the dimension stated for "group". The unique set of keys generated by the function on these labels become the new default labels for the "into" dimension. Currently, the keyfunction is hard-coded to split the label by under- scores and search for the string given in "by" in the resulting list. It then returns the list's next value. So, in the above example, the new labels for the "trackruns" dimension would be "01", "02", "04", and "08". """ if groupinfo is None : return args if len(args) == 0 : return args if len(labels[groupinfo['into']]) != 1 : raise ValueError("Dim %s is not singleton!" % groupinfo['into']) if groupinfo['group'] == groupinfo['into'] : raise ValueError("Can not group %s dimension into itself!" % groupinfo['group']) from pandas import Panel grpAxis = dataAxes[groupinfo['group']] intoAxis = dataAxes[groupinfo['into']] otherAxis = dataAxes[list(set(['scenarios', 'trackruns', 'skills']) - set([groupinfo['group'], groupinfo['into']]))[0]] # !!Temporary!! restricted functionality for just trackrun variables keyfunc = lambda x : _varval(x, groupinfo['by']) g_args = [] for a in args : wp = Panel(a, items=labels['scenarios'], major_axis=labels['skills'], minor_axis=labels['trackruns']) grouped = wp.groupby(keyfunc, axis=grpAxis) if len(grouped) == 0 : raise ValueError("Grouping didn't result in anything!") intolabs, g_a = zip(*grouped) # Get a list of numpy arrays from the list of Panels g_a = np.concatenate(map(lambda x : x.values, g_a), axis=intoAxis) g_args.append(g_a) labels[groupinfo['into']] = intolabs # Do the full set for error-checking purposes trunclabs = None for intolab in intolabs : # TODO: Generalize this! # Take some original labels and remove the variable and its value that # were used to make *intolabs* tmp = ['_'.join(_remove_varval(lab.split('_'), groupinfo['by'])) for lab in grouped.groups[intolab]] if trunclabs is not None : if tmp != trunclabs : raise ValueError("The labels do not match! %s\n%s" % (trunclabs, tmp)) else : trunclabs = tmp labels[groupinfo['group']] = trunclabs return g_args