Example #1
0
def parse(s, context, interactive=False):
    globals_context = {'False': False,
                       'True': True,
                       'nan': float('nan'),
                       'inf': float('inf')}
    globals_context.update(functions)
    globals_context.update(context.get('__globals__', {}))
    # modify in-place
    context['__globals__'] = globals_context
    try:
        node = _parse(s, interactive=interactive)
        return to_ast(node, context)
    except Exception, e:
        add_context(e, "while parsing: " + s)
        raise
Example #2
0
File: expr.py Project: gvk489/liam2
def expr_eval(expr, context):
    try:
        if isinstance(expr, Expr):
            # assert isinstance(expr.__fields__, tuple)

            globals_data = context.global_tables
            if globals_data is not None:
                globals_names = set(globals_data.keys())
                if 'periodic' in globals_data:
                    globals_names |= set(globals_data['periodic'].dtype.names)
            else:
                globals_names = set()

            # FIXME: systematically checking for the presence of variables has a
            # non-negligible cost (especially in matching), even when caching
            # collect_variables result (it is much better than before though).
            # TODO: also check for globals
            for var in expr.collect_variables():
                if var.name not in globals_names and var not in context:
                    raise Exception("variable '%s' is unknown (it is either "
                                    "not defined or not computed yet)" % var)
            return expr.evaluate(context)

            # there are several flaws with this approach:
            # 1) I don't get action times (csv et al)
            # 2) these are cumulative times (they include child expr/processes)
            #    we might want to store the timings in a tree (based on call
            #    stack depth???) so that I could rebuild both cumulative and
            #    "real" timings.
            # 3) the sum of timings is wrong since children/nested expr times
            #    count both for themselves and for all their parents
    #        time, res = gettime(expr.evaluate, context)
    #        timings[expr.__class__.__name__] += time
    #        return res
        elif isinstance(expr, list):
            return [expr_eval(e, context) for e in expr]
        elif isinstance(expr, tuple):
            return tuple([expr_eval(e, context) for e in expr])
        elif isinstance(expr, slice):
            return slice(expr_eval(expr.start, context),
                         expr_eval(expr.stop, context),
                         expr_eval(expr.step, context))
        else:
            return expr
    except Exception, e:
        add_context(e, "when evaluating: " + str(expr))
        raise
Example #3
0
    def evaluate(self, context):
#        FIXME: this cannot work, because dict.__contains__(k) calls k.__eq__
#        which has a non standard meaning
#        if self in expr_cache:
#            s = expr_cache[self]
#        else:
#            s = self.as_string(context)
#            expr_cache[self] = s

        simple_expr = self.as_simple_expr(context)
        if isinstance(simple_expr, Variable) and simple_expr.name in context:
            return context[simple_expr.name]

        # check for labeled arrays, to work around the fact that numexpr
        # does not preserve ndarray subclasses.

        # avoid checking for arrays types in the past, because that is a
        # costly operation (context[var_name] fetches the column from disk
        # in that case). This probably prevents us from doing stuff like
        # align(lag(groupby() / groupby())), but it is a limitation I can
        # live with to avoid hitting the disk twice for each disk access.

        #TODO: I should rewrite this whole mess when my "dtype" method
        # supports ndarrays and LabeledArray so that I can get the dtype from
        # the expression instead of from actual values.
        labels = None
        if isinstance(context, EntityContext) and context._is_array_period:
            for var_name in simple_expr.collect_variables(context):
                # var_name should always be in the context at this point
                # because missing temporaries should have been already caught
                # in expr_eval
                value = context[var_name]
                if isinstance(value, LabeledArray):
                    if labels is None:
                        labels = (value.dim_names, value.pvalues)
                    else:
                        if labels[0] != value.dim_names:
                            raise Exception('several arrays with inconsistent '
                                            'labels (dimension names) in the '
                                            'same expression: %s vs %s'
                                            % (labels[0], value.dim_names))
                        if not np.array_equal(labels[1], value.pvalues):
                            raise Exception('several arrays with inconsistent '
                                            'axis values in the same '
                                            'expression: \n%s\n\nvs\n\n%s'
                                            % (labels[1], value.pvalues))

        s = simple_expr.as_string()
        try:
            res = evaluate(s, context, {}, truediv='auto')
            if labels is not None:
                # This is a hack which relies on the fact that currently
                # all the expression we evaluate through numexpr preserve
                # array shapes, but if we ever use numexpr reduction
                # capabilities, we will be in trouble
                res = LabeledArray(res, labels[0], labels[1])
            return res
        except KeyError, e:
            raise add_context(e, s)
Example #4
0
def parse(s, globals_dict=None, conditional_context=None, interactive=False,
          autovariables=False):
    if not isinstance(s, basestring):
        return s

    # this prevents any function named something ending in "if"
    str_to_parse = s.replace('if(', 'where(')
    try:
        tree = ast.parse(str_to_parse)
    except Exception, e:
        raise add_context(e, s)
Example #5
0
def parse(s, globals=None, conditional_context=None, interactive=False,
          autovariables=False):
    if not isinstance(s, basestring):
        return s

    # this prevents any function named something ending in "if"
    str_to_parse = s.replace('if(', 'where(')
    tree = ast.parse(str_to_parse)
    tree = BoolToBitTransformer().visit(tree)
    body = tree.body

    # disable for now because it is not very useful yet. To be useful, I need
    # to implement:
    # * Expr.__setitem__
    # * keep the same context across several expressions in the interactive
    #   console
#    if interactive:
    if False:
        if len(body) == 0:
            to_compile = []
        else:
            # if the last statement is an expression, move it out and
            # use eval() on it instead of exec
            if isinstance(body[-1], ast.Expr):
                to_compile = [('exec', ast.Module(body[:-1])),
                              ('eval', ast.Expression(body[-1].value))]
            else:
                to_compile = [('exec', tree)]
    else:
        assert len(body) == 1 and isinstance(body[0], ast.Expr)
        to_compile = [('eval', ast.Expression(body[0].value))]

    try:
        to_eval = [(mode, compile(code, '<expr>', mode))
                   for mode, code in to_compile]
    except SyntaxError:
        # SyntaxError are clearer if left unmodified since they already contain
        # the faulty string

        # Instances of this class have attributes filename, lineno, offset and
        # text for easier access to the details. str() of the exception
        # instance returns only the message.
        raise
    except Exception, e:
        raise add_context(e, s)
Example #6
0
    def evaluate(self, context):
        # period = context.period
        #
        # if isinstance(period, np.ndarray):
        #     assert np.isscalar(period) or not period.shape
        #     period = int(period)

        # cache_key = (self, period, context.entity_name, context.filter_expr)
        # try:
        #     cached_result = expr_cache.get(cache_key, None)
        #     #FIXME: lifecycle functions should invalidate all variables!
        #     if cached_result is not None:
        #         return cached_result
        # except TypeError:
        #     # The cache_key failed to hash properly, so the expr is not
        #     # cacheable. It *should* be because of a not_hashable expr
        #     # somewhere within cache_key[3].
        #     cache_key = None

        simple_expr = self.as_simple_expr(context)
        if isinstance(simple_expr, Variable) and simple_expr.name in context:
            return context[simple_expr.name]

        # check for labeled arrays, to work around the fact that numexpr
        # does not preserve ndarray subclasses.

        # avoid checking for arrays types in the past, because that is a
        # costly operation (context[var_name] fetches the column from disk
        # in that case). This probably prevents us from doing stuff like
        # align(lag(groupby() / groupby())), but it is a limitation I can
        # live with to avoid hitting the disk twice for each disk access.

        #TODO: I should rewrite this whole mess when my "dtype" method
        # supports ndarrays and LabeledArray so that I can get the dtype from
        # the expression instead of from actual values.
        labels = None
        assert isinstance(context, EvaluationContext)
        local_ctx = context.entity_data
        if isinstance(local_ctx, EntityContext) and local_ctx.is_array_period:
            for var in simple_expr.collect_variables():
                assert var.entity is None or var.entity is context.entity, \
                    "should not have happened (as_simple_expr should " \
                    "have transformed non-local variables)"

                # var_name should always be in the context at this point
                # because missing temporaries should have been already caught
                # in expr_eval
                value = context[var.name]
                # value = local_ctx[var.name]
                if isinstance(value, LabeledArray):
                    if labels is None:
                        labels = (value.dim_names, value.pvalues)
                    else:
                        if labels[0] != value.dim_names:
                            raise Exception('several arrays with inconsistent '
                                            'labels (dimension names) in the '
                                            'same expression: %s vs %s'
                                            % (labels[0], value.dim_names))
                        # check that for each dimension the labels are the same
                        pvalues1, pvalues2 = labels[1], value.pvalues

                        # None pvalues are simply ignored. This can happen due
                        # to limitations in LabeledArray (should be lifted when
                        # we use LArray instead).
                        if pvalues1 is not None and pvalues2 is not None:
                            for labels1, labels2 in zip(pvalues1, pvalues2):
                                if not np.array_equal(labels1, labels2):
                                    raise Exception('several arrays with '
                                                    'inconsistent axis values '
                                                    'in the same expression: '
                                                    '\n%s\n\nvs\n\n%s'
                                                    % (labels1, labels2))

        s = simple_expr.as_string()
        try:
            res = evaluate(s, local_ctx, {'nan': float('nan')}, truediv='auto')
            if isinstance(res, np.ndarray) and not res.shape:
                res = np.asscalar(res)
            if labels is not None:
                # This is a hack which relies on the fact that currently
                # all the expression we evaluate through numexpr preserve
                # array shapes, but if we ever use numexpr reduction
                # capabilities, we will be in trouble
                res = LabeledArray(res, labels[0], labels[1])

            # if cache_key is not None:
            #     expr_cache[cache_key] = res
            #     if cached_result is not None:
            #         assert np.array_equal(res, cached_result), \
            #             "%s != %s" % (res, cached_result)
            return res
        except KeyError, e:
            import pdb
            pdb.set_trace()
            raise add_context(e, s)
Example #7
0
            # if the last statement is an expression, move it out and
            # use eval() on it instead of exec
            if isinstance(body[-1], ast.Expr):
                to_compile = [('exec', ast.Module(body[:-1])),
                              ('eval', ast.Expression(body[-1].value))]
            else:
                to_compile = [('exec', tree)]
    else:
        assert len(body) == 1 and isinstance(body[0], ast.Expr)
        to_compile = [('eval', ast.Expression(body[0].value))]

    try:
        to_eval = [(mode, compile(code, '<expr>', mode))
                   for mode, code in to_compile]
    except Exception, e:
        raise add_context(e, s)

    context = {'False': False,
               'True': True,
               'nan': float('nan')}

    if autovariables:
        for _, code in to_eval:
            varnames = code.co_names
            context.update((name, Variable(name)) for name in varnames)
#        context.update((name, Token(name)) for name in varnames)

    #FIXME: this whole conditional context feature is a huge hack.
    # It relies on the link target not having the same fields/links
    # than the local entity (or not using them).
    # A collision will only occur rarely but it will make it all the more