def parse(s, context, interactive=False): globals_context = {'False': False, 'True': True, 'nan': float('nan'), 'inf': float('inf')} globals_context.update(functions) globals_context.update(context.get('__globals__', {})) # modify in-place context['__globals__'] = globals_context try: node = _parse(s, interactive=interactive) return to_ast(node, context) except Exception, e: add_context(e, "while parsing: " + s) raise
def expr_eval(expr, context): try: if isinstance(expr, Expr): # assert isinstance(expr.__fields__, tuple) globals_data = context.global_tables if globals_data is not None: globals_names = set(globals_data.keys()) if 'periodic' in globals_data: globals_names |= set(globals_data['periodic'].dtype.names) else: globals_names = set() # FIXME: systematically checking for the presence of variables has a # non-negligible cost (especially in matching), even when caching # collect_variables result (it is much better than before though). # TODO: also check for globals for var in expr.collect_variables(): if var.name not in globals_names and var not in context: raise Exception("variable '%s' is unknown (it is either " "not defined or not computed yet)" % var) return expr.evaluate(context) # there are several flaws with this approach: # 1) I don't get action times (csv et al) # 2) these are cumulative times (they include child expr/processes) # we might want to store the timings in a tree (based on call # stack depth???) so that I could rebuild both cumulative and # "real" timings. # 3) the sum of timings is wrong since children/nested expr times # count both for themselves and for all their parents # time, res = gettime(expr.evaluate, context) # timings[expr.__class__.__name__] += time # return res elif isinstance(expr, list): return [expr_eval(e, context) for e in expr] elif isinstance(expr, tuple): return tuple([expr_eval(e, context) for e in expr]) elif isinstance(expr, slice): return slice(expr_eval(expr.start, context), expr_eval(expr.stop, context), expr_eval(expr.step, context)) else: return expr except Exception, e: add_context(e, "when evaluating: " + str(expr)) raise
def evaluate(self, context): # FIXME: this cannot work, because dict.__contains__(k) calls k.__eq__ # which has a non standard meaning # if self in expr_cache: # s = expr_cache[self] # else: # s = self.as_string(context) # expr_cache[self] = s simple_expr = self.as_simple_expr(context) if isinstance(simple_expr, Variable) and simple_expr.name in context: return context[simple_expr.name] # check for labeled arrays, to work around the fact that numexpr # does not preserve ndarray subclasses. # avoid checking for arrays types in the past, because that is a # costly operation (context[var_name] fetches the column from disk # in that case). This probably prevents us from doing stuff like # align(lag(groupby() / groupby())), but it is a limitation I can # live with to avoid hitting the disk twice for each disk access. #TODO: I should rewrite this whole mess when my "dtype" method # supports ndarrays and LabeledArray so that I can get the dtype from # the expression instead of from actual values. labels = None if isinstance(context, EntityContext) and context._is_array_period: for var_name in simple_expr.collect_variables(context): # var_name should always be in the context at this point # because missing temporaries should have been already caught # in expr_eval value = context[var_name] if isinstance(value, LabeledArray): if labels is None: labels = (value.dim_names, value.pvalues) else: if labels[0] != value.dim_names: raise Exception('several arrays with inconsistent ' 'labels (dimension names) in the ' 'same expression: %s vs %s' % (labels[0], value.dim_names)) if not np.array_equal(labels[1], value.pvalues): raise Exception('several arrays with inconsistent ' 'axis values in the same ' 'expression: \n%s\n\nvs\n\n%s' % (labels[1], value.pvalues)) s = simple_expr.as_string() try: res = evaluate(s, context, {}, truediv='auto') if labels is not None: # This is a hack which relies on the fact that currently # all the expression we evaluate through numexpr preserve # array shapes, but if we ever use numexpr reduction # capabilities, we will be in trouble res = LabeledArray(res, labels[0], labels[1]) return res except KeyError, e: raise add_context(e, s)
def parse(s, globals_dict=None, conditional_context=None, interactive=False, autovariables=False): if not isinstance(s, basestring): return s # this prevents any function named something ending in "if" str_to_parse = s.replace('if(', 'where(') try: tree = ast.parse(str_to_parse) except Exception, e: raise add_context(e, s)
def parse(s, globals=None, conditional_context=None, interactive=False, autovariables=False): if not isinstance(s, basestring): return s # this prevents any function named something ending in "if" str_to_parse = s.replace('if(', 'where(') tree = ast.parse(str_to_parse) tree = BoolToBitTransformer().visit(tree) body = tree.body # disable for now because it is not very useful yet. To be useful, I need # to implement: # * Expr.__setitem__ # * keep the same context across several expressions in the interactive # console # if interactive: if False: if len(body) == 0: to_compile = [] else: # if the last statement is an expression, move it out and # use eval() on it instead of exec if isinstance(body[-1], ast.Expr): to_compile = [('exec', ast.Module(body[:-1])), ('eval', ast.Expression(body[-1].value))] else: to_compile = [('exec', tree)] else: assert len(body) == 1 and isinstance(body[0], ast.Expr) to_compile = [('eval', ast.Expression(body[0].value))] try: to_eval = [(mode, compile(code, '<expr>', mode)) for mode, code in to_compile] except SyntaxError: # SyntaxError are clearer if left unmodified since they already contain # the faulty string # Instances of this class have attributes filename, lineno, offset and # text for easier access to the details. str() of the exception # instance returns only the message. raise except Exception, e: raise add_context(e, s)
def evaluate(self, context): # period = context.period # # if isinstance(period, np.ndarray): # assert np.isscalar(period) or not period.shape # period = int(period) # cache_key = (self, period, context.entity_name, context.filter_expr) # try: # cached_result = expr_cache.get(cache_key, None) # #FIXME: lifecycle functions should invalidate all variables! # if cached_result is not None: # return cached_result # except TypeError: # # The cache_key failed to hash properly, so the expr is not # # cacheable. It *should* be because of a not_hashable expr # # somewhere within cache_key[3]. # cache_key = None simple_expr = self.as_simple_expr(context) if isinstance(simple_expr, Variable) and simple_expr.name in context: return context[simple_expr.name] # check for labeled arrays, to work around the fact that numexpr # does not preserve ndarray subclasses. # avoid checking for arrays types in the past, because that is a # costly operation (context[var_name] fetches the column from disk # in that case). This probably prevents us from doing stuff like # align(lag(groupby() / groupby())), but it is a limitation I can # live with to avoid hitting the disk twice for each disk access. #TODO: I should rewrite this whole mess when my "dtype" method # supports ndarrays and LabeledArray so that I can get the dtype from # the expression instead of from actual values. labels = None assert isinstance(context, EvaluationContext) local_ctx = context.entity_data if isinstance(local_ctx, EntityContext) and local_ctx.is_array_period: for var in simple_expr.collect_variables(): assert var.entity is None or var.entity is context.entity, \ "should not have happened (as_simple_expr should " \ "have transformed non-local variables)" # var_name should always be in the context at this point # because missing temporaries should have been already caught # in expr_eval value = context[var.name] # value = local_ctx[var.name] if isinstance(value, LabeledArray): if labels is None: labels = (value.dim_names, value.pvalues) else: if labels[0] != value.dim_names: raise Exception('several arrays with inconsistent ' 'labels (dimension names) in the ' 'same expression: %s vs %s' % (labels[0], value.dim_names)) # check that for each dimension the labels are the same pvalues1, pvalues2 = labels[1], value.pvalues # None pvalues are simply ignored. This can happen due # to limitations in LabeledArray (should be lifted when # we use LArray instead). if pvalues1 is not None and pvalues2 is not None: for labels1, labels2 in zip(pvalues1, pvalues2): if not np.array_equal(labels1, labels2): raise Exception('several arrays with ' 'inconsistent axis values ' 'in the same expression: ' '\n%s\n\nvs\n\n%s' % (labels1, labels2)) s = simple_expr.as_string() try: res = evaluate(s, local_ctx, {'nan': float('nan')}, truediv='auto') if isinstance(res, np.ndarray) and not res.shape: res = np.asscalar(res) if labels is not None: # This is a hack which relies on the fact that currently # all the expression we evaluate through numexpr preserve # array shapes, but if we ever use numexpr reduction # capabilities, we will be in trouble res = LabeledArray(res, labels[0], labels[1]) # if cache_key is not None: # expr_cache[cache_key] = res # if cached_result is not None: # assert np.array_equal(res, cached_result), \ # "%s != %s" % (res, cached_result) return res except KeyError, e: import pdb pdb.set_trace() raise add_context(e, s)
# if the last statement is an expression, move it out and # use eval() on it instead of exec if isinstance(body[-1], ast.Expr): to_compile = [('exec', ast.Module(body[:-1])), ('eval', ast.Expression(body[-1].value))] else: to_compile = [('exec', tree)] else: assert len(body) == 1 and isinstance(body[0], ast.Expr) to_compile = [('eval', ast.Expression(body[0].value))] try: to_eval = [(mode, compile(code, '<expr>', mode)) for mode, code in to_compile] except Exception, e: raise add_context(e, s) context = {'False': False, 'True': True, 'nan': float('nan')} if autovariables: for _, code in to_eval: varnames = code.co_names context.update((name, Variable(name)) for name in varnames) # context.update((name, Token(name)) for name in varnames) #FIXME: this whole conditional context feature is a huge hack. # It relies on the link target not having the same fields/links # than the local entity (or not using them). # A collision will only occur rarely but it will make it all the more