def group_attrib(self): ''' return a namedtuple containing all attributes attached to groups of which the given series is a member for each group of which the series is a member ''' group_attributes = [g.attrib for g in self.dataset.groups if self in g] if group_attributes: return concat_namedtuples(*group_attributes)
def test_concat_namedtuples(): num = list(range(26)) chars = [chr(65 + i) for i in num] limits = [0, 4, 5, 8, 14, 22, 25] tuples = [] for i in range(len(limits) - 1): newtype = namedtuple_factory('Test', chars[limits[i]:limits[i + 1]]) t = newtype(*num[limits[i]:limits[i + 1]]) tuples.append(t) concat1 = concat_namedtuples(*tuples) assert isinstance(concat1, tuple) assert concat1.A == 0
def iter_pd_series(self, iter_series, dim_at_obs, dtype, attributes, reverse_obs, fromfreq, parse_time): with_obs_attr = 'o' in attributes for series in iter_series: # Generate the 3 main columns: index, values and attributes obs_zip = list(zip(*series.obs(with_values=dtype, with_attributes=with_obs_attr, reverse_obs=reverse_obs))) # Are there observations at all? if obs_zip: obs_dim = obs_zip[0] obs_values = NP.array(obs_zip[1], dtype=dtype) obs_attrib = obs_zip[2] l = len(obs_dim) # Generate the index # Get frequency if present if 'FREQ' in series.key._fields: f = series.key.FREQ elif series.attrib and 'FREQUENCY' in series.attrib._fields: f = series.attrib.FREQUENCY elif 'FREQUENCY' in series.key._fields: f = series.key.FREQUENCY elif series.attrib and 'FREQ' in series.attrib._fields: f = series.attrib.FREQ else: f = None if parse_time and dim_at_obs == 'TIME_PERIOD': # First, handle half-yearly and bimonthly freqs # and format such as '2010-S1' format dim # pandas cannot parse those. So convert them if f == 'H': f = '2Q' # patch the dim values obs_dim = ['Q'.join((od[:-2], '1' if od[-1] == '1' else '3')) for od in obs_dim] # Check if we can build the index based on start and freq # Constructing the index from the first value and FREQ should only # occur if 'fromfreq' and hence f is True if fromfreq and f: # So there is a freq and we must use it series_index = PD.period_range(start=PD.Period(obs_dim[0], freq=f), periods=l, freq=f, name=dim_at_obs) else: # There is no ffreq or we must not use it. # So generate the index from all the obs dim values series_index = PD.PeriodIndex( (PD.Period(d, freq=f) for d in obs_dim), name=dim_at_obs) elif parse_time and dim_at_obs == 'TIME': if fromfreq and f: series_index = PD.date_range( start=PD.datetime(obs_dim[0]), periods=l, freq=f, name=dim_at_obs) else: series_index = PD.DatetimeIndex( (PD.datetime(d) for d in obs_dim), name=dim_at_obs) else: # Not a datetime or period index or don't parse it series_index = PD.Index(obs_dim, name=dim_at_obs) if dtype: value_series = PD.Series( obs_values, index=series_index, name=series.key) if attributes: # Assemble attributes of dataset, group and series if # needed gen_attrib = [attr for flag, attr in (('s', series.attrib), ('g', series.group_attrib), ('d', series.dataset.attrib)) if (flag in attributes) and attr] if gen_attrib: gen_attrib = concat_namedtuples(*gen_attrib) else: gen_attrib = None if 'o' in attributes: # concat with general attributes if any if gen_attrib: attrib_iter = (concat_namedtuples(a, gen_attrib, name='Attrib') for a in obs_attrib) else: # Simply take the obs attributes attrib_iter = obs_attrib else: # Make iterator yielding the constant general attribute set # It may be None. # for each obs attrib_iter = (gen_attrib for d in obs_attrib) attrib_series = PD.Series(attrib_iter, index=series_index, dtype='object', name=series.key) else: # There are no observations. So generate empty DataFrames if dtype: value_series = PD.Series(name=series.key) if attributes: attrib_series = PD.Series(name=series.key) # decide what to yield if dtype and attributes: yield value_series, attrib_series elif dtype: yield value_series elif attributes: yield attrib_series else: raise ValueError( "At least one of 'dtype' or 'attributes' args must be True.")
def iter_pd_series(self, iter_series, dim_at_obs, dtype, attributes, reverse_obs, fromfreq, parse_time): for series in iter_series: # Generate the 3 main columns: index, values and attributes obs_zip = iter(zip(*series.obs(dtype, attributes, reverse_obs))) obs_dim = next(obs_zip) l = len(obs_dim) obs_values = NP.array(next(obs_zip), dtype=dtype) if attributes: obs_attrib = next(obs_zip) # Generate the index if parse_time and dim_at_obs == "TIME_PERIOD": # Check if we can build the index based on start and freq # Constructing the index from the first value and FREQ should only # occur if 'fromfreq' is True # and there is a FREQ dimension at all. if fromfreq and "FREQ" in series.key._fields: f = series.key.FREQ od0 = obs_dim[0] year, subdiv = map(int, (od0[:4], od0[-1])) if f == "Q": start_date = PD.datetime(year, (subdiv - 1) * 3 + 1, 1) series_index = PD.period_range(start=start_date, periods=l, freq="Q", name=dim_at_obs) elif "S" in od0: # pandas cannot represent semesters as periods. So we # use date_range. start_date = PD.datetime(year, (subdiv - 1) * 6 + 1, 1) series_index = PD.date_range(start=start_date, periods=l, freq="6M", name=dim_at_obs) else: series_index = PD.period_range(start=od0, periods=l, freq=f, name=dim_at_obs) elif "FREQ" in series.key._fields: # fromfreq is False. So generate the index from all the # strings f = series.key.FREQ # Generate arrays for years and subdivisions (quarters or # semesters if f == "Q": series_index = PD.Index( (PD.Period(year=int(d[:4]), quarter=int(d[-1]), freq="Q") for d in obs_dim), name=dim_at_obs ) elif f == "H": series_index = PD.Index( (PD.datetime(int(d[:4]), (int(d[-1]) - 1) * 6 + 1, 1) for d in obs_dim), name=dim_at_obs ) else: # other freq such as 'A' or 'M' series_index = PD.PeriodIndex(obs_dim, freq=f, name=dim_at_obs) elif parse_time and dim_at_obs == "TIME": if fromfreq and "FREQ" in series.key._fields: f = series.key.FREQ series_index = PD.date_range(start=obs_dim[0], periods=l, freq=f, name=dim_at_obs) else: series_index = PD.DatetimeIndex(obs_dim, name=dim_at_obs) # Not a datetime or period index or don't parse it else: series_index = PD.Index(obs_dim, name=dim_at_obs) if dtype: value_series = PD.Series(obs_values, index=series_index, name=series.key) if attributes: # Assemble attributes of dataset, group and series if needed gen_attrib = [ attr for flag, attr in (("s", series.attrib), ("g", series.group_attrib), ("d", series.dataset.attrib)) if (flag in attributes) and attr ] if gen_attrib: gen_attrib = concat_namedtuples(*gen_attrib) else: gen_attrib = None if "o" in attributes: # concat with general attributes if any if gen_attrib: attrib_iter = (concat_namedtuples(a, gen_attrib, name="Attrib") for a in obs_attrib) else: # Simply take the obs attributes attrib_iter = obs_attrib else: # Make iterator yielding the constant general attribute set # It may be None. # for each obs attrib_iter = (gen_attrib for d in obs_attrib) attrib_series = PD.Series(attrib_iter, index=series_index, dtype="object", name=series.key) # decide what to yield if dtype and attributes: yield value_series, attrib_series elif dtype: yield value_series elif attributes: yield attrib_series else: raise ValueError("At least one of 'dtype' or 'attributes' args must be True.")
def iter_pd_series(self, iter_series, dim_at_obs, dtype, attributes, reverse_obs, fromfreq, parse_time): for series in iter_series: # Generate the 3 main columns: index, values and attributes obs_zip = iter(zip(*series.obs(dtype, attributes, reverse_obs))) obs_dim = next(obs_zip) l = len(obs_dim) obs_values = NP.array(next(obs_zip), dtype=dtype) if attributes: obs_attrib = next(obs_zip) # Generate the index if parse_time and dim_at_obs == 'TIME_PERIOD': # Check if we can build the index based on start and freq # Constructing the index from the first value and FREQ should only # occur if 'fromfreq' is True # and there is a FREQ dimension at all. if fromfreq and 'FREQ' in series.key._fields: f = series.key.FREQ od0 = obs_dim[0] year, subdiv = map(int, (od0[:4], od0[-1])) if f == 'Q': start_date = PD.datetime(year, (subdiv - 1) * 3 + 1, 1) series_index = PD.period_range(start=start_date, periods=l, freq='Q', name=dim_at_obs) elif 'S' in od0: # pandas cannot represent semesters as periods. So we # use date_range. start_date = PD.datetime(year, (subdiv - 1) * 6 + 1, 1) series_index = PD.date_range(start=start_date, periods=l, freq='6M', name=dim_at_obs) else: series_index = PD.period_range(start=od0, periods=l, freq=f, name=dim_at_obs) elif 'FREQ' in series.key._fields: # fromfreq is False. So generate the index from all the # strings f = series.key.FREQ # Generate arrays for years and subdivisions (quarters or # semesters if f == 'Q': series_index = PD.Index((PD.Period( year=int(d[:4]), quarter=int(d[-1]), freq='Q') for d in obs_dim), name=dim_at_obs) elif f == 'H': series_index = PD.Index( (PD.datetime(int(d[:4]), (int(d[-1]) - 1) * 6 + 1, 1) for d in obs_dim), name=dim_at_obs) else: # other freq such as 'A' or 'M' series_index = PD.PeriodIndex(obs_dim, freq=f, name=dim_at_obs) elif parse_time and dim_at_obs == 'TIME': if fromfreq and 'FREQ' in series.key._fields: f = series.key.FREQ series_index = PD.date_range(start=obs_dim[0], periods=l, freq=f, name=dim_at_obs) else: series_index = PD.DatetimeIndex(obs_dim, name=dim_at_obs) # Not a datetime or period index or don't parse it else: series_index = PD.Index(obs_dim, name=dim_at_obs) if dtype: value_series = PD.Series(obs_values, index=series_index, name=series.key) if attributes: # Assemble attributes of dataset, group and series if needed gen_attrib = [ attr for flag, attr in (('s', series.attrib), ('g', series.group_attrib), ('d', series.dataset.attrib)) if (flag in attributes) and attr ] if gen_attrib: gen_attrib = concat_namedtuples(*gen_attrib) else: gen_attrib = None if 'o' in attributes: # concat with general attributes if any if gen_attrib: attrib_iter = (concat_namedtuples(a, gen_attrib, name='Attrib') for a in obs_attrib) else: # Simply take the obs attributes attrib_iter = obs_attrib else: # Make iterator yielding the constant general attribute set # It may be None. # for each obs attrib_iter = (gen_attrib for d in obs_attrib) attrib_series = PD.Series(attrib_iter, index=series_index, dtype='object', name=series.key) # decide what to yield if dtype and attributes: yield value_series, attrib_series elif dtype: yield value_series elif attributes: yield attrib_series else: raise ValueError( "At least one of 'dtype' or 'attributes' args must be True." )