def compute(self, context, expr, filter=None, skip_na=True): # FIXME: either take "contextual filter" into account here (by using # self._getfilter), or don't do it in sum & gini if filter is not None: tmpvar = self.add_tmp_var(context, filter) if getdtype(expr, context) is bool: # convert expr to int because mul_bbb is not implemented in # numexpr # expr *= 1 expr = BinaryOp('*', expr, 1) # expr *= filter_values expr = BinaryOp('*', expr, tmpvar) else: filter = True values = expr_eval(expr, context) values = np.asarray(values) if skip_na: # we should *not* use an inplace operation because filter can be a # simple variable filter = filter & ispresent(values) if filter is True: numrows = len(values) else: numrows = np.sum(filter) if numrows: if skip_na: return na_sum(values) / float(numrows) else: return np.sum(values) / float(numrows) else: return float('nan')
def compute(self, context, expr): entity = context.entity baseperiod = entity.base_period period = context.period - 1 typemap = {bool: int, int: int, float: float} res_type = typemap[getdtype(expr, context)] res_size = len(entity.array) sum_values = np.zeros(res_size, dtype=res_type) id_to_rownum = context.id_to_rownum while period >= baseperiod: ids, values = self.value_for_period(expr, period, context, fill=None) # filter out lines which are present because there was a value for # that individual at that period but not for that column acceptable_rows = hasvalue(values) acceptable_ids = ids[acceptable_rows] if len(acceptable_ids): acceptable_values = values[acceptable_rows] value_rows = id_to_rownum[acceptable_ids] period_value = np.zeros(res_size, dtype=np.float) safe_put(period_value, value_rows, acceptable_values) sum_values += period_value period -= 1 return sum_values
def evaluate(self, context): if self.filter is None: return context_length(context) else: #TODO: check this at "compile" time (in __init__), though for # that we need to know the type of all temporary variables # first if getdtype(self.filter, context) is not bool: raise Exception("count filter must be a boolean expression") return np.sum(expr_eval(self.filter, context))
def _getfilter(self, context): ctx_filter = context.get('__filter__') if self.filter is not None and ctx_filter is not None: filter_expr = ctx_filter & self.filter elif self.filter is not None: filter_expr = self.filter elif ctx_filter is not None: filter_expr = ctx_filter else: filter_expr = None if filter_expr is not None and getdtype(filter_expr, context) is not bool: raise Exception("filter must be a boolean expression") return filter_expr
def _getfilter(context, filter): ctx_filter = context.filter_expr #FIXME: this is a hack and shows that the not_hashable filter_expr in # context is not really a good solution. We should rather add a flag # in the context "ishardsubset" or something like that. if filter is not_hashable: filter_expr = ctx_filter elif ctx_filter is not_hashable: filter_expr = filter elif filter is not None and ctx_filter is not None: filter_expr = LogicalOp('&', ctx_filter, filter) elif filter is not None: filter_expr = filter elif ctx_filter is not None: filter_expr = ctx_filter else: filter_expr = None if filter_expr is not None and \ getdtype(filter_expr, context) is not bool: raise Exception("filter must be a boolean expression") return filter_expr
def _getfilter(context, filter): ctx_filter = context.filter_expr # FIXME: this is a hack and shows that the not_hashable filter_expr in # context is not really a good solution. We should rather add a flag # in the context "ishardsubset" or something like that. if filter is not_hashable: filter_expr = ctx_filter elif ctx_filter is not_hashable: filter_expr = filter elif filter is not None and ctx_filter is not None: filter_expr = LogicalOp('&', ctx_filter, filter) elif filter is not None: filter_expr = filter elif ctx_filter is not None: filter_expr = ctx_filter else: filter_expr = None if filter_expr is not None and \ getdtype(filter_expr, context) is not bool: raise Exception("filter must be a boolean expression") return filter_expr
def evaluate(self, context): expr = self.expr #FIXME: either take "contextual filter" into account here (by using # self._getfilter), or don't do it in sum & gini if self.filter is not None: filter_values = expr_eval(self.filter, context) tmp_varname = get_tmp_varname() context = context.copy() context[tmp_varname] = filter_values if getdtype(expr, context) is bool: # convert expr to int because mul_bbb is not implemented in # numexpr expr *= 1 expr *= Variable(tmp_varname) else: filter_values = True values = expr_eval(expr, context) values = np.asarray(values) if self.skip_na: # we should *not* use an inplace operation because filter_values # can be a simple variable filter_values = filter_values & ispresent(values) if filter_values is True: numrows = len(values) else: numrows = np.sum(filter_values) if numrows: if self.skip_na: return na_sum(values) / float(numrows) else: return np.sum(values) / float(numrows) else: return float('nan')
def dtype(self, context): target_context = self.target_context(context) return getdtype(self.target_expr, target_context)
def dtype(self, context): return getdtype(self.args[0], context)
def dtype(self, context): assert getdtype(self.expr, context) == bool return int
def dtype(self, context): #TODO: merge this typemap with tsum's typemap = {bool: int, int: int, float: float} return typemap[getdtype(self.args[0], context)]
def dtype(self, context): # result dtype is the same as the input dtype res = getdtype(self.args[0], context) assert res == float return res
def add_filter(expr, filter): if filter is not None: missing_value = missing_values[getdtype(expr, None)] return Where(filter, expr, missing_value) else: return expr
def dtype(self, context): return getdtype(self.choices, context)
def dtype(self, context): target_context = self.target_context(context) expr_dype = getdtype(self.target_expr, target_context) #TODO: merge this typemap with the one in tsum typemap = {bool: int, int: int, float: float} return typemap[expr_dype]
def dtype(self, context): # TODO: merge this typemap with tsum's typemap = {bool: int, int: int, float: float} return typemap[getdtype(self.args[0], context)]
def dtype(self, context): assert getdtype(self.args[0], context) == bool return int
def dtype(self, context): assert getdtype(self.cond, context) == bool return coerce_types(context, self.iftrue, self.iffalse)
def dtype(self, context): assert getdtype(self.expr, context) == float return int
def add_filter(self, expr, context): if self.filter is not None: missing_value = missing_values[getdtype(expr, context)] return Where(self.filter, expr, missing_value) else: return expr
def dtype(self, context): return getdtype(self.expr1, context)