Beispiel #1
0
    def default(self, link, name, kwargs):
        """
        Adds a file meta dependent aggregation to a Stack.

        Checks the Link definition against the file meta and produces
        either a numerical or categorical summary tabulation including
        marginal the results.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, name, kwargs)
        pos, relation, rel_to, weights, text = view.get_std_params()
        meta = link.get_meta()
        categorical = ['single', 'delimited set']
        numeric = ['int', 'float']
        string = ['string']
        categorizable = categorical + numeric
        x_type, y_type, transpose = self._get_method_types(link)
        q = qp.Quantity(link, weight=weights)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if link.y == '@':
                if x_type in categorical or x_type == 'array':
                    view_df = q.count().result
                elif x_type in numeric:
                    view_df = q.summarize().result
                    view_df.drop((link.x, 'All'), axis=0, inplace=True)
                elif x_type in string:
                    view_df = tools.view.agg.make_default_str_view(data,
                                                                   x=link.x)
            elif link.x == '@':
                if y_type in categorical:
                    view_df = q.count().result
                elif y_type in numeric:
                    view_df = q.summarize().result
                    view_df.drop((link.y, 'All'), axis=1, inplace=True)
            else:
                if x_type in categorical and y_type in categorizable:
                    view_df = q.count().result
                elif x_type in numeric and y_type in categorizable:
                    view_df = q.summarize().result
                    view_df.drop((link.x, 'All'), axis=0, inplace=True)
                    view_df.drop((link.y, 'All'), axis=1, inplace=True)
            notation = view.notation('default', ':')
            view.dataframe = view_df
            view._notation = notation
            link[notation] = view
Beispiel #2
0
    def descriptives(self, link, name, kwargs):
        """
        Adds num. distribution statistics of a Link defintion to the Stack.

        ``descriptives`` views can apply a range of summary statistics.
        Measures include statistics of centrality, dispersion and mass.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label suffix for the meta component of the view
            which will be appended to the statistic name and used when the
            view is passed into a Quantipy build (e.g. Excel, Powerpoint).
        stats : str, default 'mean'
            The measure to compute.
        exclude : list of int
             Codes that will not be considered calculating the result.
        rescale : dict
            A mapping of {old code: new code}, e.g.::

                {
                 1: 0,
                 2: 25,
                 3: 50,
                 4: 75,
                 5: 100
                }
        drop : bool
            If ``rescale`` provides a new scale defintion, ``drop`` will remove
            all codes that are not transformed. Acts as a shorthand for manually
            passing any remaining codes in ``exclude``.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, name, kwargs=kwargs)
        if not view._x['is_multi'] or kwargs.get('source'):
            view = View(link, name, kwargs=kwargs)
            axis, condition, rel_to, weights, text = view.get_std_params()
            logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
            )
            stat = kwargs.get('stats', 'mean')
            view._kwargs['calc_only'] = True
            w = weights if weights is not None else None
            q = qp.Quantity(link, w)

            if kwargs.get('source', None):
                q = self._swap_and_rebase(q, kwargs['source'])
            if q.type == 'array' and not q.y == '@':
                pass
            else:
                if exclude is not None:
                    q.exclude(exclude, axis=axis)
                if rescale is not None:
                    drop = kwargs.get('drop', False)
                    q.rescale(rescale, drop)
                    if drop:
                        view._kwargs['exclude'] = q.miss_x
                condition = view.spec_condition(link)
                q.summarize(stat=stat, margin=False, as_df=True)
                if calc:
                    q.calc(calc, result_only=True)
                    method_nota = 'd.' + stat + '.c:f'
                else:
                    method_nota = 'd.' + stat
                notation = view.notation(method_nota, condition)
                view.cbases = q.cbase
                view.rbases = q.rbase
                if q.type == 'array':
                    view.dataframe = q.result.T if link.y == '@' else q.result
                else:
                    view.dataframe = q.result
                view._notation = notation
                view.translate_metric(set_value='meta')
                view._kwargs['exclude'] = q.miss_x
                link[notation] = view
Beispiel #3
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]

                # code logic
                'logic': has_all([1, 2, 3])

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
        )
        # ====================================================================
        # this block of kwargs should be removed
        # parameter overwriting should be done using the template
        # NOT QP core code!
        if kwargs.get('combine', False):
            view._kwargs['expand'], expand = None, None
            view._kwargs['complete'], complete = False, False
            if logic is not None:
                for no, logic_def in enumerate(logic):
                    if 'expand' in logic_def.keys():
                        logic_def['expand'] = None
                        logic[no] = logic_def
                view._kwargs['logic'] = logic
        # ====================================================================
        w = weights if weights is not None else None
        ignore = True if name == 'cbase_gross' else False
        q = qp.Quantity(link, w, ignore_flags=ignore)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if logic is not None:
                try:
                    q.group(groups=logic,
                            axis=axis,
                            expand=expand,
                            complete=complete)
                except NotImplementedError, e:
                    warnings.warn('NotImplementedError: {}'.format(e))
                    return None
                q.count(axis=None, as_df=False, margin=False)
                condition = view.spec_condition(link, q.logical_conditions,
                                                expand)
            else:
Beispiel #4
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]

                # code logic
                'logic': has_all([1, 2, 3])

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
        )
        # ====================================================================
        # This block of kwargs should be removed
        # parameter overwriting should be done using the template
        # NOT QP core code!
        if kwargs.get('combine', False):
            view._kwargs['expand'], expand = None, None
            view._kwargs['complete'], complete = False, False
            if logic is not None:
                for no, logic_def in enumerate(logic):
                    if 'expand' in list(logic_def.keys()):
                        logic_def['expand'] = None
                        logic[no] = logic_def
                view._kwargs['logic'] = logic
        # --------------------------------------------------------------------
        # This block of code resolves the rel_to arg. in order to be able to use
        # rebased % computations. We are also adjusting for the regular notation
        # string here...
        # We need to avoid the forced overwriting of the kwarg and use the actual
        # rel_to != 'x', 'y', 'counts_sum' string...
        per_cell = False
        if not rel_to in ['', None, 'x', 'y', 'counts_sum']:
            view._kwargs['rel_to'] = 'y'
            rel_to_kind = rel_to.split('.')
            if len(rel_to_kind) == 2:
                rel_to = rel_to_kind[0]
                if rel_to_kind[1] == 'cells':
                    per_cell = True
                elif rel_to_kind[1] == 'y':
                    per_cell = False
            try:
                link['x|f|:||{}|counts'.format(
                    weights)]._kwargs['rebased'] = True
            except:
                pass
        # ====================================================================
        w = weights if weights is not None else None
        ignore = True if name == 'cbase_gross' else False
        q = qp.Quantity(link, w, ignore_flags=ignore)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if q.leveled:
                leveled = Level(q)
                if rel_to is not None:
                    leveled.percent()
                elif axis == 'x':
                    leveled.base()
                else:
                    leveled.count()
                view.dataframe = leveled.lvldf
            elif logic is not None:
                try:
                    q.group(groups=logic,
                            axis=axis,
                            expand=expand,
                            complete=complete)
                except NotImplementedError as e:
                    warnings.warn('NotImplementedError: {}'.format(e))
                    return None
                q.count(axis=None, as_df=False, margin=False)
                condition = view.spec_condition(link, q.logical_conditions,
                                                expand)
            else:
                eff = True if name == 'ebase' else False
                raw = True if name in ['counts_sum', 'c%_sum'] else False
                cum_sum = True if name in ['counts_cumsum', 'c%_cumsum'
                                           ] else False
                if cum_sum: axis = None
                if eff: axis = 'x'
                q.count(axis=axis,
                        raw_sum=raw,
                        effective=eff,
                        cum_sum=cum_sum,
                        margin=False,
                        as_df=False)
            if rel_to is not None:
                if q.type == 'array':
                    rel_to = 'y'
                q.normalize(rel_to, per_cell=per_cell)
            q.to_df()
            view.cbases = q.cbase
            view.rbases = q.rbase
            if calc is not None:
                calc_only = kwargs.get('calc_only', False)
                q.calc(calc, axis, result_only=calc_only)
            if calc is not None or name in [
                    'counts_sum', 'c%_sum', 'counts_cumsum', 'c%_cumsum'
            ]:
                method_nota = 'f.c:f'
            else:
                method_nota = 'f'
            notation = view.notation(method_nota, condition)
            view._notation = notation
            if not q.leveled:
                if q.type == 'array':
                    view.dataframe = q.result.T if link.y == '@' else q.result
                else:
                    view.dataframe = q.result
            view._kwargs['exclude'] = q.miss_x

            link[notation] = view
Beispiel #5
0
    def descriptives(self, link, name, kwargs):
        """
        Adds num. distribution statistics of a Link defintion to the Stack.

        ``descriptives`` views can apply a range of summary statistics.
        Measures include statistics of centrality, dispersion and mass.
        
        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label suffix for the meta component of the view
            which will be appended to the statistic name and used when the
            view is passed into a Quantipy build (e.g. Excel, Powerpoint). 
        exclude : list of int
             Codes that will not be considered calculating the result.
        rescale : dict
            A mapping of {old code: new code}, e.g.::

                {
                 1: 0,
                 2: 25,
                 3: 50,
                 4: 75,
                 5: 100
                }

        stats : str, default 'mean'
            The measure to compute.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, kwargs=kwargs)
        if not view._x['is_multi']:
            func_name = 'descriptives'
            func_type = 'distribution statistics'
            pos, relation, rel_to, weights, text = view.std_params()

            stat = kwargs.get('stats', 'mean')
            exclude = view.missing()
            rescale = view.rescaling()
            q = qp.Quantity(link, weights)

            if exclude is not None:
                q = q.missingfy(exclude, keep_base=False)
            if rescale is not None:
                q = q.rescale(rescale)
            view.fulltext_for_stat(stat)
            relation = view.spec_relation(link)
            view_df = q.describe(show=stat, margin=False, as_df=True)
            notation = view.notation(stat, name, relation)
            view.cbases = view_df.cbase
            view.rbases = view_df.rbase
            view.dataframe = view_df.result
            view.name = notation
            link[notation] = view
Beispiel #6
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]
                
                # code logic
                'logic': has_all([1, 2, 3])         

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        func_name = 'frequency'
        func_type = 'countbased'
        view = View(link, kwargs=kwargs)
        pos, relation, rel_to, weights, text = view.std_params()
        q = qp.Quantity(link, weights)
        logic = kwargs.get('logic', None)
        calc = kwargs.get('calc', None)
        val_name = None

        if name in ['ebase', 'cbase', 'rbase']:
            freq = q.count(name, margin=False, as_df=False)
        elif name in ['counts', 'c%', 'r%']:
            freq = q.count('freq', margin=False, as_df=False)
        elif logic:
            if isinstance(logic, list):
                if not isinstance(logic[0], dict):
                    val_name = name
                if calc:
                    calc_only = kwargs.get('calc_only', False)
                else:
                    calc_only = False
                freq = q.combine(logic,
                                 op=calc,
                                 op_only=calc_only,
                                 margin=False,
                                 as_df=False)
                relation = view.spec_relation()
            else:
                val_name = name
                casedata = link.get_data().copy()
                idx, relation = tools.view.logic.get_logic_index(
                    casedata[link.x], logic, casedata)
                filtered_q = qp.Quantity(link, weights, idx)
                freq = filtered_q.combine(margin=False, as_df=False)
        view.cbases = freq.cbase
        view.rbases = freq.rbase
        if rel_to is not None:
            base = 'col' if rel_to == 'y' else 'row'
            freq = freq.normalize(base)
        view_df = freq.to_df(val_name).result
        notation = view.notation(func_name, name, relation)
        view.name = notation
        view.dataframe = view_df
        link[notation] = view