def default(self, link, name, kwargs): """ Adds a file meta dependent aggregation to a Stack. Checks the Link definition against the file meta and produces either a numerical or categorical summary tabulation including marginal the results. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, name, kwargs) pos, relation, rel_to, weights, text = view.get_std_params() meta = link.get_meta() categorical = ['single', 'delimited set'] numeric = ['int', 'float'] string = ['string'] categorizable = categorical + numeric x_type, y_type, transpose = self._get_method_types(link) q = qp.Quantity(link, weight=weights) if q.type == 'array' and not q.y == '@': pass else: if link.y == '@': if x_type in categorical or x_type == 'array': view_df = q.count().result elif x_type in numeric: view_df = q.summarize().result view_df.drop((link.x, 'All'), axis=0, inplace=True) elif x_type in string: view_df = tools.view.agg.make_default_str_view(data, x=link.x) elif link.x == '@': if y_type in categorical: view_df = q.count().result elif y_type in numeric: view_df = q.summarize().result view_df.drop((link.y, 'All'), axis=1, inplace=True) else: if x_type in categorical and y_type in categorizable: view_df = q.count().result elif x_type in numeric and y_type in categorizable: view_df = q.summarize().result view_df.drop((link.x, 'All'), axis=0, inplace=True) view_df.drop((link.y, 'All'), axis=1, inplace=True) notation = view.notation('default', ':') view.dataframe = view_df view._notation = notation link[notation] = view
def descriptives(self, link, name, kwargs): """ Adds num. distribution statistics of a Link defintion to the Stack. ``descriptives`` views can apply a range of summary statistics. Measures include statistics of centrality, dispersion and mass. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label suffix for the meta component of the view which will be appended to the statistic name and used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). stats : str, default 'mean' The measure to compute. exclude : list of int Codes that will not be considered calculating the result. rescale : dict A mapping of {old code: new code}, e.g.:: { 1: 0, 2: 25, 3: 50, 4: 75, 5: 100 } drop : bool If ``rescale`` provides a new scale defintion, ``drop`` will remove all codes that are not transformed. Acts as a shorthand for manually passing any remaining codes in ``exclude``. Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, name, kwargs=kwargs) if not view._x['is_multi'] or kwargs.get('source'): view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() logic, expand, complete, calc, exclude, rescale = view.get_edit_params( ) stat = kwargs.get('stats', 'mean') view._kwargs['calc_only'] = True w = weights if weights is not None else None q = qp.Quantity(link, w) if kwargs.get('source', None): q = self._swap_and_rebase(q, kwargs['source']) if q.type == 'array' and not q.y == '@': pass else: if exclude is not None: q.exclude(exclude, axis=axis) if rescale is not None: drop = kwargs.get('drop', False) q.rescale(rescale, drop) if drop: view._kwargs['exclude'] = q.miss_x condition = view.spec_condition(link) q.summarize(stat=stat, margin=False, as_df=True) if calc: q.calc(calc, result_only=True) method_nota = 'd.' + stat + '.c:f' else: method_nota = 'd.' + stat notation = view.notation(method_nota, condition) view.cbases = q.cbase view.rbases = q.rbase if q.type == 'array': view.dataframe = q.result.T if link.y == '@' else q.result else: view.dataframe = q.result view._notation = notation view.translate_metric(set_value='meta') view._kwargs['exclude'] = q.miss_x link[notation] = view
def frequency(self, link, name, kwargs): """ Adds count-based views on a Link defintion to the Stack object. ``frequency`` is able to compute several aggregates that are based on the count of code values in uni- or bivariate Links. This includes bases / samples sizes, raw or normalized cell frequencies and code summaries like simple and complex nets. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). logic : list of int, list of dicts or core.tools.view.logic operation If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. For example:: # simple net 'logic': [1, 2, 3] # multiple nets/code groups 'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}] # code logic 'logic': has_all([1, 2, 3]) calc : TODO calc_only : TODO Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Net codes take into account if a variable is multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases. """ view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() logic, expand, complete, calc, exclude, rescale = view.get_edit_params( ) # ==================================================================== # this block of kwargs should be removed # parameter overwriting should be done using the template # NOT QP core code! if kwargs.get('combine', False): view._kwargs['expand'], expand = None, None view._kwargs['complete'], complete = False, False if logic is not None: for no, logic_def in enumerate(logic): if 'expand' in logic_def.keys(): logic_def['expand'] = None logic[no] = logic_def view._kwargs['logic'] = logic # ==================================================================== w = weights if weights is not None else None ignore = True if name == 'cbase_gross' else False q = qp.Quantity(link, w, ignore_flags=ignore) if q.type == 'array' and not q.y == '@': pass else: if logic is not None: try: q.group(groups=logic, axis=axis, expand=expand, complete=complete) except NotImplementedError, e: warnings.warn('NotImplementedError: {}'.format(e)) return None q.count(axis=None, as_df=False, margin=False) condition = view.spec_condition(link, q.logical_conditions, expand) else:
def frequency(self, link, name, kwargs): """ Adds count-based views on a Link defintion to the Stack object. ``frequency`` is able to compute several aggregates that are based on the count of code values in uni- or bivariate Links. This includes bases / samples sizes, raw or normalized cell frequencies and code summaries like simple and complex nets. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). logic : list of int, list of dicts or core.tools.view.logic operation If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. For example:: # simple net 'logic': [1, 2, 3] # multiple nets/code groups 'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}] # code logic 'logic': has_all([1, 2, 3]) calc : TODO calc_only : TODO Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Net codes take into account if a variable is multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases. """ view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() logic, expand, complete, calc, exclude, rescale = view.get_edit_params( ) # ==================================================================== # This block of kwargs should be removed # parameter overwriting should be done using the template # NOT QP core code! if kwargs.get('combine', False): view._kwargs['expand'], expand = None, None view._kwargs['complete'], complete = False, False if logic is not None: for no, logic_def in enumerate(logic): if 'expand' in list(logic_def.keys()): logic_def['expand'] = None logic[no] = logic_def view._kwargs['logic'] = logic # -------------------------------------------------------------------- # This block of code resolves the rel_to arg. in order to be able to use # rebased % computations. We are also adjusting for the regular notation # string here... # We need to avoid the forced overwriting of the kwarg and use the actual # rel_to != 'x', 'y', 'counts_sum' string... per_cell = False if not rel_to in ['', None, 'x', 'y', 'counts_sum']: view._kwargs['rel_to'] = 'y' rel_to_kind = rel_to.split('.') if len(rel_to_kind) == 2: rel_to = rel_to_kind[0] if rel_to_kind[1] == 'cells': per_cell = True elif rel_to_kind[1] == 'y': per_cell = False try: link['x|f|:||{}|counts'.format( weights)]._kwargs['rebased'] = True except: pass # ==================================================================== w = weights if weights is not None else None ignore = True if name == 'cbase_gross' else False q = qp.Quantity(link, w, ignore_flags=ignore) if q.type == 'array' and not q.y == '@': pass else: if q.leveled: leveled = Level(q) if rel_to is not None: leveled.percent() elif axis == 'x': leveled.base() else: leveled.count() view.dataframe = leveled.lvldf elif logic is not None: try: q.group(groups=logic, axis=axis, expand=expand, complete=complete) except NotImplementedError as e: warnings.warn('NotImplementedError: {}'.format(e)) return None q.count(axis=None, as_df=False, margin=False) condition = view.spec_condition(link, q.logical_conditions, expand) else: eff = True if name == 'ebase' else False raw = True if name in ['counts_sum', 'c%_sum'] else False cum_sum = True if name in ['counts_cumsum', 'c%_cumsum' ] else False if cum_sum: axis = None if eff: axis = 'x' q.count(axis=axis, raw_sum=raw, effective=eff, cum_sum=cum_sum, margin=False, as_df=False) if rel_to is not None: if q.type == 'array': rel_to = 'y' q.normalize(rel_to, per_cell=per_cell) q.to_df() view.cbases = q.cbase view.rbases = q.rbase if calc is not None: calc_only = kwargs.get('calc_only', False) q.calc(calc, axis, result_only=calc_only) if calc is not None or name in [ 'counts_sum', 'c%_sum', 'counts_cumsum', 'c%_cumsum' ]: method_nota = 'f.c:f' else: method_nota = 'f' notation = view.notation(method_nota, condition) view._notation = notation if not q.leveled: if q.type == 'array': view.dataframe = q.result.T if link.y == '@' else q.result else: view.dataframe = q.result view._kwargs['exclude'] = q.miss_x link[notation] = view
def descriptives(self, link, name, kwargs): """ Adds num. distribution statistics of a Link defintion to the Stack. ``descriptives`` views can apply a range of summary statistics. Measures include statistics of centrality, dispersion and mass. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label suffix for the meta component of the view which will be appended to the statistic name and used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). exclude : list of int Codes that will not be considered calculating the result. rescale : dict A mapping of {old code: new code}, e.g.:: { 1: 0, 2: 25, 3: 50, 4: 75, 5: 100 } stats : str, default 'mean' The measure to compute. Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, kwargs=kwargs) if not view._x['is_multi']: func_name = 'descriptives' func_type = 'distribution statistics' pos, relation, rel_to, weights, text = view.std_params() stat = kwargs.get('stats', 'mean') exclude = view.missing() rescale = view.rescaling() q = qp.Quantity(link, weights) if exclude is not None: q = q.missingfy(exclude, keep_base=False) if rescale is not None: q = q.rescale(rescale) view.fulltext_for_stat(stat) relation = view.spec_relation(link) view_df = q.describe(show=stat, margin=False, as_df=True) notation = view.notation(stat, name, relation) view.cbases = view_df.cbase view.rbases = view_df.rbase view.dataframe = view_df.result view.name = notation link[notation] = view
def frequency(self, link, name, kwargs): """ Adds count-based views on a Link defintion to the Stack object. ``frequency`` is able to compute several aggregates that are based on the count of code values in uni- or bivariate Links. This includes bases / samples sizes, raw or normalized cell frequencies and code summaries like simple and complex nets. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). logic : list of int, list of dicts or core.tools.view.logic operation If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. For example:: # simple net 'logic': [1, 2, 3] # multiple nets/code groups 'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}] # code logic 'logic': has_all([1, 2, 3]) calc : TODO calc_only : TODO Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Net codes take into account if a variable is multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases. """ func_name = 'frequency' func_type = 'countbased' view = View(link, kwargs=kwargs) pos, relation, rel_to, weights, text = view.std_params() q = qp.Quantity(link, weights) logic = kwargs.get('logic', None) calc = kwargs.get('calc', None) val_name = None if name in ['ebase', 'cbase', 'rbase']: freq = q.count(name, margin=False, as_df=False) elif name in ['counts', 'c%', 'r%']: freq = q.count('freq', margin=False, as_df=False) elif logic: if isinstance(logic, list): if not isinstance(logic[0], dict): val_name = name if calc: calc_only = kwargs.get('calc_only', False) else: calc_only = False freq = q.combine(logic, op=calc, op_only=calc_only, margin=False, as_df=False) relation = view.spec_relation() else: val_name = name casedata = link.get_data().copy() idx, relation = tools.view.logic.get_logic_index( casedata[link.x], logic, casedata) filtered_q = qp.Quantity(link, weights, idx) freq = filtered_q.combine(margin=False, as_df=False) view.cbases = freq.cbase view.rbases = freq.rbase if rel_to is not None: base = 'col' if rel_to == 'y' else 'row' freq = freq.normalize(base) view_df = freq.to_df(val_name).result notation = view.notation(func_name, name, relation) view.name = notation view.dataframe = view_df link[notation] = view