Esempio n. 1
0
 def group_attrib(self):
     '''
     return a namedtuple containing all attributes attached
     to groups of which the given series is a member
     for each group of which the series is a member
     '''
     group_attributes = [g.attrib for g in self.dataset.groups if self in g]
     if group_attributes:
         return concat_namedtuples(*group_attributes)
Esempio n. 2
0
def test_concat_namedtuples():
    num = list(range(26))
    chars = [chr(65 + i) for i in num]
    limits = [0, 4, 5, 8, 14, 22, 25]
    tuples = []
    for i in range(len(limits) - 1):
        newtype = namedtuple_factory('Test', chars[limits[i]:limits[i + 1]])
        t = newtype(*num[limits[i]:limits[i + 1]])
        tuples.append(t)
    concat1 = concat_namedtuples(*tuples)
    assert isinstance(concat1, tuple)
    assert concat1.A == 0
Esempio n. 3
0
    def iter_pd_series(self, iter_series, dim_at_obs, dtype,
                       attributes, reverse_obs, fromfreq, parse_time):
        with_obs_attr = 'o' in attributes
        for series in iter_series:
            # Generate the 3 main columns: index, values and attributes
            obs_zip = list(zip(*series.obs(with_values=dtype,
                                           with_attributes=with_obs_attr, reverse_obs=reverse_obs)))
            # Are there observations at all?
            if obs_zip:
                obs_dim = obs_zip[0]
                obs_values = NP.array(obs_zip[1], dtype=dtype)
                obs_attrib = obs_zip[2]
                l = len(obs_dim)

                # Generate the index
                # Get frequency if present
                if 'FREQ' in series.key._fields:
                    f = series.key.FREQ
                elif series.attrib and 'FREQUENCY' in series.attrib._fields:
                    f = series.attrib.FREQUENCY
                elif 'FREQUENCY' in series.key._fields:
                    f = series.key.FREQUENCY
                elif series.attrib and 'FREQ' in series.attrib._fields:
                    f = series.attrib.FREQ
                else:
                    f = None

                if parse_time and dim_at_obs == 'TIME_PERIOD':
                    # First, handle half-yearly and bimonthly freqs
                    # and format such as '2010-S1' format dim
                    # pandas cannot parse those. So convert them
                    if f == 'H':
                        f = '2Q'
                        # patch the dim values
                        obs_dim = ['Q'.join((od[:-2], '1' if od[-1] == '1' else '3'))
                                   for od in obs_dim]
                    # Check if we can build the index based on start and freq
                    # Constructing the index from the first value and FREQ should only
                    # occur if 'fromfreq' and hence f is True
                    if fromfreq and f:  # So there is a freq and we must use it
                        series_index = PD.period_range(start=PD.Period(obs_dim[0], freq=f), periods=l,
                                                       freq=f, name=dim_at_obs)
                    else:
                        # There is no ffreq or we must not use it.
                        # So generate the index from all the obs dim values
                        series_index = PD.PeriodIndex(
                            (PD.Period(d, freq=f) for d in obs_dim), name=dim_at_obs)
                elif parse_time and dim_at_obs == 'TIME':
                    if fromfreq and f:
                        series_index = PD.date_range(
                            start=PD.datetime(obs_dim[0]), periods=l, freq=f, name=dim_at_obs)
                    else:
                        series_index = PD.DatetimeIndex(
                            (PD.datetime(d) for d in obs_dim),
                            name=dim_at_obs)
                else:
                    # Not a datetime or period index or don't parse it
                    series_index = PD.Index(obs_dim, name=dim_at_obs)

                if dtype:
                    value_series = PD.Series(
                        obs_values, index=series_index, name=series.key)

                if attributes:
                    # Assemble attributes of dataset, group and series if
                    # needed
                    gen_attrib = [attr
                                  for flag, attr in (('s', series.attrib),
                                                     ('g', series.group_attrib), ('d', series.dataset.attrib))
                                  if (flag in attributes) and attr]
                    if gen_attrib:
                        gen_attrib = concat_namedtuples(*gen_attrib)
                    else:
                        gen_attrib = None

                    if 'o' in attributes:
                        # concat with general attributes if any
                        if gen_attrib:
                            attrib_iter = (concat_namedtuples(a, gen_attrib,
                                                              name='Attrib') for a in obs_attrib)
                        else:
                            # Simply take the obs attributes
                            attrib_iter = obs_attrib
                    else:
                        # Make iterator yielding the constant general attribute set
                        # It may be None.
                        # for each obs
                        attrib_iter = (gen_attrib for d in obs_attrib)

                    attrib_series = PD.Series(attrib_iter,
                                              index=series_index, dtype='object', name=series.key)

            else:
                # There are no observations. So generate empty DataFrames
                if dtype:
                    value_series = PD.Series(name=series.key)
                if attributes:
                    attrib_series = PD.Series(name=series.key)

            # decide what to yield
            if dtype and attributes:
                yield value_series, attrib_series
            elif dtype:
                yield value_series
            elif attributes:
                yield attrib_series
            else:
                raise ValueError(
                    "At least one of 'dtype' or 'attributes' args must be True.")
Esempio n. 4
0
    def iter_pd_series(self, iter_series, dim_at_obs, dtype, attributes, reverse_obs, fromfreq, parse_time):

        for series in iter_series:
            # Generate the 3 main columns: index, values and attributes
            obs_zip = iter(zip(*series.obs(dtype, attributes, reverse_obs)))
            obs_dim = next(obs_zip)
            l = len(obs_dim)
            obs_values = NP.array(next(obs_zip), dtype=dtype)
            if attributes:
                obs_attrib = next(obs_zip)

            # Generate the index
            if parse_time and dim_at_obs == "TIME_PERIOD":
                # Check if we can build the index based on start and freq
                # Constructing the index from the first value and FREQ should only
                # occur if 'fromfreq' is True
                # and there is a FREQ dimension at all.
                if fromfreq and "FREQ" in series.key._fields:
                    f = series.key.FREQ
                    od0 = obs_dim[0]
                    year, subdiv = map(int, (od0[:4], od0[-1]))
                    if f == "Q":
                        start_date = PD.datetime(year, (subdiv - 1) * 3 + 1, 1)
                        series_index = PD.period_range(start=start_date, periods=l, freq="Q", name=dim_at_obs)
                    elif "S" in od0:
                        # pandas cannot represent semesters as periods. So we
                        # use date_range.
                        start_date = PD.datetime(year, (subdiv - 1) * 6 + 1, 1)
                        series_index = PD.date_range(start=start_date, periods=l, freq="6M", name=dim_at_obs)
                    else:
                        series_index = PD.period_range(start=od0, periods=l, freq=f, name=dim_at_obs)
                elif "FREQ" in series.key._fields:
                    # fromfreq is False. So generate the index from all the
                    # strings
                    f = series.key.FREQ
                    # Generate arrays for years and subdivisions (quarters or
                    # semesters
                    if f == "Q":
                        series_index = PD.Index(
                            (PD.Period(year=int(d[:4]), quarter=int(d[-1]), freq="Q") for d in obs_dim), name=dim_at_obs
                        )
                    elif f == "H":
                        series_index = PD.Index(
                            (PD.datetime(int(d[:4]), (int(d[-1]) - 1) * 6 + 1, 1) for d in obs_dim), name=dim_at_obs
                        )
                    else:  # other freq such as 'A' or 'M'
                        series_index = PD.PeriodIndex(obs_dim, freq=f, name=dim_at_obs)
            elif parse_time and dim_at_obs == "TIME":
                if fromfreq and "FREQ" in series.key._fields:
                    f = series.key.FREQ
                    series_index = PD.date_range(start=obs_dim[0], periods=l, freq=f, name=dim_at_obs)
                else:
                    series_index = PD.DatetimeIndex(obs_dim, name=dim_at_obs)
            # Not a datetime or period index or don't parse it
            else:
                series_index = PD.Index(obs_dim, name=dim_at_obs)

            if dtype:
                value_series = PD.Series(obs_values, index=series_index, name=series.key)

            if attributes:
                # Assemble attributes of dataset, group and series if needed
                gen_attrib = [
                    attr
                    for flag, attr in (("s", series.attrib), ("g", series.group_attrib), ("d", series.dataset.attrib))
                    if (flag in attributes) and attr
                ]
                if gen_attrib:
                    gen_attrib = concat_namedtuples(*gen_attrib)
                else:
                    gen_attrib = None

                if "o" in attributes:
                    # concat with general attributes if any
                    if gen_attrib:
                        attrib_iter = (concat_namedtuples(a, gen_attrib, name="Attrib") for a in obs_attrib)
                    else:
                        # Simply take the obs attributes
                        attrib_iter = obs_attrib
                else:
                    # Make iterator yielding the constant general attribute set
                    # It may be None.
                    # for each obs
                    attrib_iter = (gen_attrib for d in obs_attrib)

                attrib_series = PD.Series(attrib_iter, index=series_index, dtype="object", name=series.key)

            # decide what to yield
            if dtype and attributes:
                yield value_series, attrib_series
            elif dtype:
                yield value_series
            elif attributes:
                yield attrib_series
            else:
                raise ValueError("At least one of 'dtype' or 'attributes' args must be True.")
Esempio n. 5
0
    def iter_pd_series(self, iter_series, dim_at_obs, dtype, attributes,
                       reverse_obs, fromfreq, parse_time):

        for series in iter_series:
            # Generate the 3 main columns: index, values and attributes
            obs_zip = iter(zip(*series.obs(dtype, attributes, reverse_obs)))
            obs_dim = next(obs_zip)
            l = len(obs_dim)
            obs_values = NP.array(next(obs_zip), dtype=dtype)
            if attributes:
                obs_attrib = next(obs_zip)

            # Generate the index
            if parse_time and dim_at_obs == 'TIME_PERIOD':
                # Check if we can build the index based on start and freq
                # Constructing the index from the first value and FREQ should only
                # occur if 'fromfreq' is True
                # and there is a FREQ dimension at all.
                if fromfreq and 'FREQ' in series.key._fields:
                    f = series.key.FREQ
                    od0 = obs_dim[0]
                    year, subdiv = map(int, (od0[:4], od0[-1]))
                    if f == 'Q':
                        start_date = PD.datetime(year, (subdiv - 1) * 3 + 1, 1)
                        series_index = PD.period_range(start=start_date,
                                                       periods=l,
                                                       freq='Q',
                                                       name=dim_at_obs)
                    elif 'S' in od0:
                        # pandas cannot represent semesters as periods. So we
                        # use date_range.
                        start_date = PD.datetime(year, (subdiv - 1) * 6 + 1, 1)
                        series_index = PD.date_range(start=start_date,
                                                     periods=l,
                                                     freq='6M',
                                                     name=dim_at_obs)
                    else:
                        series_index = PD.period_range(start=od0,
                                                       periods=l,
                                                       freq=f,
                                                       name=dim_at_obs)
                elif 'FREQ' in series.key._fields:
                    # fromfreq is False. So generate the index from all the
                    # strings
                    f = series.key.FREQ
                    # Generate arrays for years and subdivisions (quarters or
                    # semesters
                    if f == 'Q':
                        series_index = PD.Index((PD.Period(
                            year=int(d[:4]), quarter=int(d[-1]), freq='Q')
                                                 for d in obs_dim),
                                                name=dim_at_obs)
                    elif f == 'H':
                        series_index = PD.Index(
                            (PD.datetime(int(d[:4]),
                                         (int(d[-1]) - 1) * 6 + 1, 1)
                             for d in obs_dim),
                            name=dim_at_obs)
                    else:  # other freq such as 'A' or 'M'
                        series_index = PD.PeriodIndex(obs_dim,
                                                      freq=f,
                                                      name=dim_at_obs)
            elif parse_time and dim_at_obs == 'TIME':
                if fromfreq and 'FREQ' in series.key._fields:
                    f = series.key.FREQ
                    series_index = PD.date_range(start=obs_dim[0],
                                                 periods=l,
                                                 freq=f,
                                                 name=dim_at_obs)
                else:
                    series_index = PD.DatetimeIndex(obs_dim, name=dim_at_obs)
            # Not a datetime or period index or don't parse it
            else:
                series_index = PD.Index(obs_dim, name=dim_at_obs)

            if dtype:
                value_series = PD.Series(obs_values,
                                         index=series_index,
                                         name=series.key)

            if attributes:
                # Assemble attributes of dataset, group and series if needed
                gen_attrib = [
                    attr for flag, attr in (('s', series.attrib),
                                            ('g', series.group_attrib),
                                            ('d', series.dataset.attrib))
                    if (flag in attributes) and attr
                ]
                if gen_attrib:
                    gen_attrib = concat_namedtuples(*gen_attrib)
                else:
                    gen_attrib = None

                if 'o' in attributes:
                    # concat with general attributes if any
                    if gen_attrib:
                        attrib_iter = (concat_namedtuples(a,
                                                          gen_attrib,
                                                          name='Attrib')
                                       for a in obs_attrib)
                    else:
                        # Simply take the obs attributes
                        attrib_iter = obs_attrib
                else:
                    # Make iterator yielding the constant general attribute set
                    # It may be None.
                    # for each obs
                    attrib_iter = (gen_attrib for d in obs_attrib)

                attrib_series = PD.Series(attrib_iter,
                                          index=series_index,
                                          dtype='object',
                                          name=series.key)

            # decide what to yield
            if dtype and attributes:
                yield value_series, attrib_series
            elif dtype:
                yield value_series
            elif attributes:
                yield attrib_series
            else:
                raise ValueError(
                    "At least one of 'dtype' or 'attributes' args must be True."
                )