예제 #1
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]

                # code logic
                'logic': has_all([1, 2, 3])

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
        )
        # ====================================================================
        # this block of kwargs should be removed
        # parameter overwriting should be done using the template
        # NOT QP core code!
        if kwargs.get('combine', False):
            view._kwargs['expand'], expand = None, None
            view._kwargs['complete'], complete = False, False
            if logic is not None:
                for no, logic_def in enumerate(logic):
                    if 'expand' in logic_def.keys():
                        logic_def['expand'] = None
                        logic[no] = logic_def
                view._kwargs['logic'] = logic
        # ====================================================================
        w = weights if weights is not None else None
        ignore = True if name == 'cbase_gross' else False
        q = qp.Quantity(link, w, ignore_flags=ignore)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if logic is not None:
                try:
                    q.group(groups=logic,
                            axis=axis,
                            expand=expand,
                            complete=complete)
                except NotImplementedError, e:
                    warnings.warn('NotImplementedError: {}'.format(e))
                    return None
                q.count(axis=None, as_df=False, margin=False)
                condition = view.spec_condition(link, q.logical_conditions,
                                                expand)
            else:
예제 #2
0
    def descriptives(self, link, name, kwargs):
        """
        Adds num. distribution statistics of a Link defintion to the Stack.

        ``descriptives`` views can apply a range of summary statistics.
        Measures include statistics of centrality, dispersion and mass.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label suffix for the meta component of the view
            which will be appended to the statistic name and used when the
            view is passed into a Quantipy build (e.g. Excel, Powerpoint).
        stats : str, default 'mean'
            The measure to compute.
        exclude : list of int
             Codes that will not be considered calculating the result.
        rescale : dict
            A mapping of {old code: new code}, e.g.::

                {
                 1: 0,
                 2: 25,
                 3: 50,
                 4: 75,
                 5: 100
                }
        drop : bool
            If ``rescale`` provides a new scale defintion, ``drop`` will remove
            all codes that are not transformed. Acts as a shorthand for manually
            passing any remaining codes in ``exclude``.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, name, kwargs=kwargs)
        if not view._x['is_multi'] or kwargs.get('source'):
            view = View(link, name, kwargs=kwargs)
            axis, condition, rel_to, weights, text = view.get_std_params()
            logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
            )
            stat = kwargs.get('stats', 'mean')
            view._kwargs['calc_only'] = True
            w = weights if weights is not None else None
            q = qp.Quantity(link, w)

            if kwargs.get('source', None):
                q = self._swap_and_rebase(q, kwargs['source'])
            if q.type == 'array' and not q.y == '@':
                pass
            else:
                if exclude is not None:
                    q.exclude(exclude, axis=axis)
                if rescale is not None:
                    drop = kwargs.get('drop', False)
                    q.rescale(rescale, drop)
                    if drop:
                        view._kwargs['exclude'] = q.miss_x
                condition = view.spec_condition(link)
                q.summarize(stat=stat, margin=False, as_df=True)
                if calc:
                    q.calc(calc, result_only=True)
                    method_nota = 'd.' + stat + '.c:f'
                else:
                    method_nota = 'd.' + stat
                notation = view.notation(method_nota, condition)
                view.cbases = q.cbase
                view.rbases = q.rbase
                if q.type == 'array':
                    view.dataframe = q.result.T if link.y == '@' else q.result
                else:
                    view.dataframe = q.result
                view._notation = notation
                view.translate_metric(set_value='meta')
                view._kwargs['exclude'] = q.miss_x
                link[notation] = view
예제 #3
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]

                # code logic
                'logic': has_all([1, 2, 3])

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
        )
        # ====================================================================
        # This block of kwargs should be removed
        # parameter overwriting should be done using the template
        # NOT QP core code!
        if kwargs.get('combine', False):
            view._kwargs['expand'], expand = None, None
            view._kwargs['complete'], complete = False, False
            if logic is not None:
                for no, logic_def in enumerate(logic):
                    if 'expand' in list(logic_def.keys()):
                        logic_def['expand'] = None
                        logic[no] = logic_def
                view._kwargs['logic'] = logic
        # --------------------------------------------------------------------
        # This block of code resolves the rel_to arg. in order to be able to use
        # rebased % computations. We are also adjusting for the regular notation
        # string here...
        # We need to avoid the forced overwriting of the kwarg and use the actual
        # rel_to != 'x', 'y', 'counts_sum' string...
        per_cell = False
        if not rel_to in ['', None, 'x', 'y', 'counts_sum']:
            view._kwargs['rel_to'] = 'y'
            rel_to_kind = rel_to.split('.')
            if len(rel_to_kind) == 2:
                rel_to = rel_to_kind[0]
                if rel_to_kind[1] == 'cells':
                    per_cell = True
                elif rel_to_kind[1] == 'y':
                    per_cell = False
            try:
                link['x|f|:||{}|counts'.format(
                    weights)]._kwargs['rebased'] = True
            except:
                pass
        # ====================================================================
        w = weights if weights is not None else None
        ignore = True if name == 'cbase_gross' else False
        q = qp.Quantity(link, w, ignore_flags=ignore)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if q.leveled:
                leveled = Level(q)
                if rel_to is not None:
                    leveled.percent()
                elif axis == 'x':
                    leveled.base()
                else:
                    leveled.count()
                view.dataframe = leveled.lvldf
            elif logic is not None:
                try:
                    q.group(groups=logic,
                            axis=axis,
                            expand=expand,
                            complete=complete)
                except NotImplementedError as e:
                    warnings.warn('NotImplementedError: {}'.format(e))
                    return None
                q.count(axis=None, as_df=False, margin=False)
                condition = view.spec_condition(link, q.logical_conditions,
                                                expand)
            else:
                eff = True if name == 'ebase' else False
                raw = True if name in ['counts_sum', 'c%_sum'] else False
                cum_sum = True if name in ['counts_cumsum', 'c%_cumsum'
                                           ] else False
                if cum_sum: axis = None
                if eff: axis = 'x'
                q.count(axis=axis,
                        raw_sum=raw,
                        effective=eff,
                        cum_sum=cum_sum,
                        margin=False,
                        as_df=False)
            if rel_to is not None:
                if q.type == 'array':
                    rel_to = 'y'
                q.normalize(rel_to, per_cell=per_cell)
            q.to_df()
            view.cbases = q.cbase
            view.rbases = q.rbase
            if calc is not None:
                calc_only = kwargs.get('calc_only', False)
                q.calc(calc, axis, result_only=calc_only)
            if calc is not None or name in [
                    'counts_sum', 'c%_sum', 'counts_cumsum', 'c%_cumsum'
            ]:
                method_nota = 'f.c:f'
            else:
                method_nota = 'f'
            notation = view.notation(method_nota, condition)
            view._notation = notation
            if not q.leveled:
                if q.type == 'array':
                    view.dataframe = q.result.T if link.y == '@' else q.result
                else:
                    view.dataframe = q.result
            view._kwargs['exclude'] = q.miss_x

            link[notation] = view