Esempio n. 1
0
    def compute(self, context, expr, filter=None, skip_na=True):
        # FIXME: either take "contextual filter" into account here (by using
        # self._getfilter), or don't do it in sum & gini
        if filter is not None:
            tmpvar = self.add_tmp_var(context, filter)
            if getdtype(expr, context) is bool:
                # convert expr to int because mul_bbb is not implemented in
                # numexpr
                # expr *= 1
                expr = BinaryOp('*', expr, 1)
            # expr *= filter_values
            expr = BinaryOp('*', expr, tmpvar)
        else:
            filter = True

        values = expr_eval(expr, context)
        values = np.asarray(values)

        if skip_na:
            # we should *not* use an inplace operation because filter can be a
            # simple variable
            filter = filter & ispresent(values)

        if filter is True:
            numrows = len(values)
        else:
            numrows = np.sum(filter)

        if numrows:
            if skip_na:
                return na_sum(values) / float(numrows)
            else:
                return np.sum(values) / float(numrows)
        else:
            return float('nan')
Esempio n. 2
0
    def compute(self, context, expr):
        entity = context.entity

        baseperiod = entity.base_period
        period = context.period - 1

        typemap = {bool: int, int: int, float: float}
        res_type = typemap[getdtype(expr, context)]
        res_size = len(entity.array)

        sum_values = np.zeros(res_size, dtype=res_type)
        id_to_rownum = context.id_to_rownum
        while period >= baseperiod:
            ids, values = self.value_for_period(expr,
                                                period,
                                                context,
                                                fill=None)

            # filter out lines which are present because there was a value for
            # that individual at that period but not for that column
            acceptable_rows = hasvalue(values)
            acceptable_ids = ids[acceptable_rows]
            if len(acceptable_ids):
                acceptable_values = values[acceptable_rows]

                value_rows = id_to_rownum[acceptable_ids]

                period_value = np.zeros(res_size, dtype=np.float)
                safe_put(period_value, value_rows, acceptable_values)

                sum_values += period_value
            period -= 1
        return sum_values
Esempio n. 3
0
    def compute(self, context, expr, filter=None, skip_na=True):
        # FIXME: either take "contextual filter" into account here (by using
        # self._getfilter), or don't do it in sum & gini
        if filter is not None:
            tmpvar = self.add_tmp_var(context, filter)
            if getdtype(expr, context) is bool:
                # convert expr to int because mul_bbb is not implemented in
                # numexpr
                # expr *= 1
                expr = BinaryOp('*', expr, 1)
            # expr *= filter_values
            expr = BinaryOp('*', expr, tmpvar)
        else:
            filter = True

        values = expr_eval(expr, context)
        values = np.asarray(values)

        if skip_na:
            # we should *not* use an inplace operation because filter can be a
            # simple variable
            filter = filter & ispresent(values)

        if filter is True:
            numrows = len(values)
        else:
            numrows = np.sum(filter)

        if numrows:
            if skip_na:
                return na_sum(values) / float(numrows)
            else:
                return np.sum(values) / float(numrows)
        else:
            return float('nan')
Esempio n. 4
0
    def compute(self, context, expr):
        entity = context.entity

        baseperiod = entity.base_period
        period = context.period - 1

        typemap = {bool: int, int: int, float: float}
        res_type = typemap[getdtype(expr, context)]
        res_size = len(entity.array)

        sum_values = np.zeros(res_size, dtype=res_type)
        id_to_rownum = context.id_to_rownum
        while period >= baseperiod:
            ids, values = self.value_for_period(expr, period, context,
                                                fill=None)

            # filter out lines which are present because there was a value for
            # that individual at that period but not for that column
            acceptable_rows = hasvalue(values)
            acceptable_ids = ids[acceptable_rows]
            if len(acceptable_ids):
                acceptable_values = values[acceptable_rows]

                value_rows = id_to_rownum[acceptable_ids]

                period_value = np.zeros(res_size, dtype=np.float)
                safe_put(period_value, value_rows, acceptable_values)

                sum_values += period_value
            period -= 1
        return sum_values
Esempio n. 5
0
 def evaluate(self, context):
     if self.filter is None:
         return context_length(context)
     else:
         #TODO: check this at "compile" time (in __init__), though for
         # that we need to know the type of all temporary variables
         # first
         if getdtype(self.filter, context) is not bool:
             raise Exception("count filter must be a boolean expression")
         return np.sum(expr_eval(self.filter, context))
Esempio n. 6
0
 def _getfilter(self, context):
     ctx_filter = context.get('__filter__')
     if self.filter is not None and ctx_filter is not None:
         filter_expr = ctx_filter & self.filter
     elif self.filter is not None:
         filter_expr = self.filter
     elif ctx_filter is not None:
         filter_expr = ctx_filter
     else:
         filter_expr = None
     if filter_expr is not None and getdtype(filter_expr, context) is not bool:
         raise Exception("filter must be a boolean expression")
     return filter_expr
Esempio n. 7
0
 def _getfilter(context, filter):
     ctx_filter = context.filter_expr
     #FIXME: this is a hack and shows that the not_hashable filter_expr in
     #  context is not really a good solution. We should rather add a flag
     # in the context "ishardsubset" or something like that.
     if filter is not_hashable:
         filter_expr = ctx_filter
     elif ctx_filter is not_hashable:
         filter_expr = filter
     elif filter is not None and ctx_filter is not None:
         filter_expr = LogicalOp('&', ctx_filter, filter)
     elif filter is not None:
         filter_expr = filter
     elif ctx_filter is not None:
         filter_expr = ctx_filter
     else:
         filter_expr = None
     if filter_expr is not None and \
             getdtype(filter_expr, context) is not bool:
         raise Exception("filter must be a boolean expression")
     return filter_expr
Esempio n. 8
0
 def _getfilter(context, filter):
     ctx_filter = context.filter_expr
     # FIXME: this is a hack and shows that the not_hashable filter_expr in
     #  context is not really a good solution. We should rather add a flag
     # in the context "ishardsubset" or something like that.
     if filter is not_hashable:
         filter_expr = ctx_filter
     elif ctx_filter is not_hashable:
         filter_expr = filter
     elif filter is not None and ctx_filter is not None:
         filter_expr = LogicalOp('&', ctx_filter, filter)
     elif filter is not None:
         filter_expr = filter
     elif ctx_filter is not None:
         filter_expr = ctx_filter
     else:
         filter_expr = None
     if filter_expr is not None and \
             getdtype(filter_expr, context) is not bool:
         raise Exception("filter must be a boolean expression")
     return filter_expr
Esempio n. 9
0
    def evaluate(self, context):
        expr = self.expr

        #FIXME: either take "contextual filter" into account here (by using
        # self._getfilter), or don't do it in sum & gini
        if self.filter is not None:
            filter_values = expr_eval(self.filter, context)
            tmp_varname = get_tmp_varname()
            context = context.copy()
            context[tmp_varname] = filter_values
            if getdtype(expr, context) is bool:
                # convert expr to int because mul_bbb is not implemented in
                # numexpr
                expr *= 1
            expr *= Variable(tmp_varname)
        else:
            filter_values = True

        values = expr_eval(expr, context)
        values = np.asarray(values)

        if self.skip_na:
            # we should *not* use an inplace operation because filter_values
            # can be a simple variable
            filter_values = filter_values & ispresent(values)

        if filter_values is True:
            numrows = len(values)
        else:
            numrows = np.sum(filter_values)

        if numrows:
            if self.skip_na:
                return na_sum(values) / float(numrows)
            else:
                return np.sum(values) / float(numrows)
        else:
            return float('nan')
Esempio n. 10
0
 def dtype(self, context):
     target_context = self.target_context(context)
     return getdtype(self.target_expr, target_context)
Esempio n. 11
0
 def dtype(self, context):
     return getdtype(self.args[0], context)
Esempio n. 12
0
 def dtype(self, context):
     assert getdtype(self.expr, context) == bool
     return int
Esempio n. 13
0
 def dtype(self, context):
     #TODO: merge this typemap with tsum's
     typemap = {bool: int, int: int, float: float}
     return typemap[getdtype(self.args[0], context)]
Esempio n. 14
0
 def dtype(self, context):
     # result dtype is the same as the input dtype
     res = getdtype(self.args[0], context)
     assert res == float
     return res
Esempio n. 15
0
 def add_filter(expr, filter):
     if filter is not None:
         missing_value = missing_values[getdtype(expr, None)]
         return Where(filter, expr, missing_value)
     else:
         return expr
Esempio n. 16
0
 def dtype(self, context):
     return getdtype(self.choices, context)
Esempio n. 17
0
 def dtype(self, context):
     target_context = self.target_context(context)
     expr_dype = getdtype(self.target_expr, target_context)
     #TODO: merge this typemap with the one in tsum
     typemap = {bool: int, int: int, float: float}
     return typemap[expr_dype]
Esempio n. 18
0
 def dtype(self, context):
     # TODO: merge this typemap with tsum's
     typemap = {bool: int, int: int, float: float}
     return typemap[getdtype(self.args[0], context)]
Esempio n. 19
0
 def dtype(self, context):
     assert getdtype(self.args[0], context) == bool
     return int
Esempio n. 20
0
 def add_filter(expr, filter):
     if filter is not None:
         missing_value = missing_values[getdtype(expr, None)]
         return Where(filter, expr, missing_value)
     else:
         return expr
Esempio n. 21
0
 def dtype(self, context):
     assert getdtype(self.cond, context) == bool
     return coerce_types(context, self.iftrue, self.iffalse)
Esempio n. 22
0
 def dtype(self, context):
     assert getdtype(self.expr, context) == float
     return int
Esempio n. 23
0
 def dtype(self, context):
     assert getdtype(self.cond, context) == bool
     return coerce_types(context, self.iftrue, self.iffalse)
Esempio n. 24
0
 def add_filter(self, expr, context):
     if self.filter is not None:
         missing_value = missing_values[getdtype(expr, context)]
         return Where(self.filter, expr, missing_value)
     else:
         return expr
Esempio n. 25
0
 def dtype(self, context):
     assert getdtype(self.args[0], context) == bool
     return int
Esempio n. 26
0
 def dtype(self, context):
     return getdtype(self.expr1, context)