def generate_variable_name_tuple(self):
     """return a tuple (package_name, dataset_names, short_name, alias, autogen_class)
     corresponding to my expression."""
     # Hack: always generate a new variable class if there is an alias, unless the expression is just a
     # reference to a variable and the alias is equal to its shortname  (See note below.)  In theory we
     # shouldn't need a new class if we are just providing an alias for fully-qualified variable,
     # dataset-qualified variable, or an attribute; but fixing this would require that datasets
     # keep a dictionary of additional aliases, since the alias is used as the name of the attribute.
     # This change should be made sometime when there is a rewrite of Dataset.)
     #
     # Note regarding an expression that is just a reference to a variable and the alias is equal to its shortname:
     # this handles cases like
     #       population = urbansim.gridcell.population
     # in which the alias is the same as the shortname.  In this case we just drop the alias.
     #
     # first check if expr is a fully-qualified variable, dataset-qualified variable, or an attribute
     same, vars = match(EXPRESSION_IS_FULLY_QUALIFIED_VARIABLE,
                        self._expr_parsetree)
     if same and (self._alias is None or vars['shortname'] == self._alias):
         return (vars['package'], (vars['dataset'], ), vars['shortname'],
                 None, None)
     same, vars = match(EXPRESSION_IS_DATASET_QUALIFIED_VARIABLE,
                        self._expr_parsetree)
     if same and (self._alias is None or vars['shortname'] == self._alias):
         return (None, (vars['dataset'], ), vars['shortname'], None, None)
     same, vars = match(EXPRESSION_IS_ATTRIBUTE, self._expr_parsetree)
     if same and (self._alias is None or vars['shortname'] == self._alias):
         return (None, (), vars['shortname'], None, None)
     # it's a more complex expression -- need to generate a new variable class
     (short_name, autogen_class) = self._generate_new_variable()
     return (None, self._dataset_names, short_name, self._alias,
             autogen_class)
 def _get_arguments(self, arg_pattern_names, formals, arg_dict):
     formals_list = list(formals)  # make a copy, since we'll alter this
     # keyword_mode becomes true once we start seeing keywords on the arguments
     keyword_mode = False
     result = {}
     for a in arg_pattern_names:
         if a not in arg_dict:
             # we've gone through all of the actual arguments that were supplied
             return result
         same, vars = match(SUBPATTERN_ARGUMENT, arg_dict[a])
         if not same:
             raise ValueError, 'parse error for arguments'
         if 'part2' in vars:
             # change to keyword mode if necessary (if we're already there, that's ok)
             keyword_mode = True
         elif keyword_mode:
             # we're in keyword mode, but no keyword on this arg
             raise ValueError, 'non-keyword argument found after keyword argument'
         if keyword_mode:
             # get the actual keyword out of part1, and the value out of part2
             kwd_same, kwd_vars = match(SUBPATTERN_NAME_ARG, vars['part1'])
             if not kwd_same:
                 raise ValueError, 'parse error for arguments'
             kwd = kwd_vars['name']
             val = vars['part2']
             if kwd not in formals_list:
                 raise ValueError, 'unknown keyword %s' % kwd
             formals_list.remove(kwd)
             result[kwd] = val
         else:
             kwd = formals_list[0]
             formals_list = formals_list[1:]
             val = vars['part1']
             result[kwd] = val
     return result
 def generate_variable_name_tuple(self):
     """return a tuple (package_name, dataset_names, short_name, alias, autogen_class)
     corresponding to my expression."""
     # Hack: always generate a new variable class if there is an alias, unless the expression is just a
     # reference to a variable and the alias is equal to its shortname  (See note below.)  In theory we 
     # shouldn't need a new class if we are just providing an alias for fully-qualified variable, 
     # dataset-qualified variable, or an attribute; but fixing this would require that datasets
     # keep a dictionary of additional aliases, since the alias is used as the name of the attribute.
     # This change should be made sometime when there is a rewrite of Dataset.)
     #
     # Note regarding an expression that is just a reference to a variable and the alias is equal to its shortname:
     # this handles cases like 
     #       population = urbansim.gridcell.population
     # in which the alias is the same as the shortname.  In this case we just drop the alias.
     #
     # first check if expr is a fully-qualified variable, dataset-qualified variable, or an attribute
     same, vars = match(EXPRESSION_IS_FULLY_QUALIFIED_VARIABLE, self._expr_parsetree)
     if same and (self._alias is None or vars['shortname']==self._alias):
         return (vars['package'], (vars['dataset'],), vars['shortname'], None, None)
     same, vars = match(EXPRESSION_IS_DATASET_QUALIFIED_VARIABLE, self._expr_parsetree)
     if same and (self._alias is None or vars['shortname']==self._alias):
         return (None, (vars['dataset'],), vars['shortname'], None, None)
     same, vars = match(EXPRESSION_IS_ATTRIBUTE, self._expr_parsetree)
     if same and (self._alias is None or vars['shortname']==self._alias):
         return (None, (), vars['shortname'], None, None)
     # it's a more complex expression -- need to generate a new variable class
     (short_name, autogen_class) = self._generate_new_variable()
     return (None, self._dataset_names, short_name, self._alias, autogen_class)
 def _get_arguments(self, arg_pattern_names, formals, arg_dict):
     formals_list = list(formals)  # make a copy, since we'll alter this
     # keyword_mode becomes true once we start seeing keywords on the arguments
     keyword_mode = False
     result = {}
     for a in arg_pattern_names:
         if a not in arg_dict:
             # we've gone through all of the actual arguments that were supplied
             return result
         same, vars = match(SUBPATTERN_ARGUMENT, arg_dict[a])
         if not same:
             raise ValueError, 'parse error for arguments'
         if 'part2' in vars:
             # change to keyword mode if necessary (if we're already there, that's ok)
             keyword_mode = True
         elif keyword_mode:
             # we're in keyword mode, but no keyword on this arg
             raise ValueError, 'non-keyword argument found after keyword argument'
         if keyword_mode:
             # get the actual keyword out of part1, and the value out of part2
             kwd_same, kwd_vars = match(SUBPATTERN_NAME_ARG, vars['part1'])
             if not kwd_same:
                 raise ValueError, 'parse error for arguments'
             kwd = kwd_vars['name']
             val = vars['part2']
             if kwd not in formals_list:
                 raise ValueError, 'unknown keyword %s' % kwd
             formals_list.remove(kwd)
             result[kwd] = val
         else:
             kwd = formals_list[0]
             formals_list = formals_list[1:]
             val = vars['part1']
             result[kwd] = val         
     return result
 def _analyze_tree(self, tree):
     # add the dependents of parse tree 'tree' to 'dependents'
     # base case - if tree isn't a tuple, we're at a leaf -- no dependents in that case
     if type(tree) is not TupleType:
         return
     # if tree matches the fully qualified variable subpattern, then add that variable as the dependent
     same, vars = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE, tree)
     if same:
         # it's a fully-qualified variable (maybe raised to a power)
         self._dependents.add(
             (vars['package'], vars['dataset'], vars['shortname']))
         return
     same, vars = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE_WITH_CAST, tree)
     if same:
         # it's a fully-qualified variable with a cast (maybe raised to a power)
         self._dependents.add(
             (vars['package'], vars['dataset'], vars['shortname']))
         return
     same, vars = match(SUBPATTERN_DATASET_QUALIFIED_ATTRIBUTE, tree)
     if same:
         self._dependents.add((None, vars['dataset'], vars['shortname']))
         return
     same, vars = match(SUBPATTERN_DATASET_QUALIFIED_ATTRIBUTE_WITH_CAST,
                        tree)
     if same:
         self._dependents.add((None, vars['dataset'], vars['shortname']))
         return
     same, vars = match(SUBPATTERN_ATTRIBUTE, tree)
     if same:
         if vars['shortname'] not in self._named_constants:
             # it's an attribute (maybe raised to a power)
             self._dependents.add((None, None, vars['shortname']))
         return
     same, vars = match(SUBPATTERN_ATTRIBUTE_WITH_CAST, tree)
     if same:
         self._dependents.add((None, None, vars['shortname']))
         return
     same, vars = match(SUBPATTERN_METHOD_CALL_WITH_ARGS, tree)
     if same:
         self._analyze_method_call(vars['receiver'], vars['method'],
                                   vars['args'])
         return
     same, vars = match(SUBPATTERN_METHOD_CALL_WITH_ARGS_WITH_CAST, tree)
     if same:
         self._analyze_method_call(vars['receiver'], vars['method'],
                                   vars['args'])
         return
     # Check for arguments to a method or function.  Since there may be a variable number
     # of arguments, just check whether the first thing in the tree is the symbol for an
     # argument list.  If so the rest of the tuple is the arguments.
     same, vars = match(SUBPATTERN_ARGLIST, tree[0])
     if same:
         # this is a list of arguments for a method or function
         self._analyze_arguments(tree[1:])
         return
     # otherwise recursively descend through the tuple (ignoring the first element, which is
     # an integer identifying the production in the grammar)
     for sub in tree[1:]:
         self._analyze_tree(sub)
 def _analyze_tree(self, tree):
     # add the dependents of parse tree 'tree' to 'dependents'
     # base case - if tree isn't a tuple, we're at a leaf -- no dependents in that case
     if type(tree) is not TupleType:
         return
     # if tree matches the fully qualified variable subpattern, then add that variable as the dependent
     same, vars = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE, tree)
     if same:
         # it's a fully-qualified variable (maybe raised to a power)
         self._dependents.add( (vars['package'], vars['dataset'], vars['shortname']) )
         return
     same, vars = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE_WITH_CAST, tree)
     if same:
         # it's a fully-qualified variable with a cast (maybe raised to a power)
         self._dependents.add( (vars['package'], vars['dataset'], vars['shortname']) )
         return
     same, vars = match(SUBPATTERN_DATASET_QUALIFIED_ATTRIBUTE, tree)
     if same:
         self._dependents.add( (None, vars['dataset'], vars['shortname']) )
         return
     same, vars = match(SUBPATTERN_DATASET_QUALIFIED_ATTRIBUTE_WITH_CAST, tree)
     if same:
         self._dependents.add( (None, vars['dataset'], vars['shortname']) )
         return
     same, vars = match(SUBPATTERN_ATTRIBUTE, tree)
     if same:
         if vars['shortname'] not in self._named_constants:
             # it's an attribute (maybe raised to a power)
             self._dependents.add( (None, None, vars['shortname']) )
         return
     same, vars = match(SUBPATTERN_ATTRIBUTE_WITH_CAST, tree)
     if same:
         self._dependents.add( (None, None, vars['shortname']) )
         return
     same, vars = match(SUBPATTERN_METHOD_CALL_WITH_ARGS, tree)
     if same:
         self._analyze_method_call(vars['receiver'], vars['method'], vars['args'])
         return
     same, vars = match(SUBPATTERN_METHOD_CALL_WITH_ARGS_WITH_CAST, tree)
     if same:
         self._analyze_method_call(vars['receiver'], vars['method'], vars['args'])
         return
     # Check for arguments to a method or function.  Since there may be a variable number
     # of arguments, just check whether the first thing in the tree is the symbol for an
     # argument list.  If so the rest of the tuple is the arguments.
     same, vars = match(SUBPATTERN_ARGLIST, tree[0])
     if same:
         # this is a list of arguments for a method or function
         self._analyze_arguments(tree[1:])
         return
     # otherwise recursively descend through the tuple (ignoring the first element, which is
     # an integer identifying the production in the grammar)
     for sub in tree[1:]:
         self._analyze_tree(sub)
 def _analyze_agent_times_choice_method_call(self, receiver, method, args):
     same, vars = match(SUBPATTERN_AGENT_TIMES_CHOICE, args)
     if not same:
         raise ValueError, "syntax error for agent_times_choice function call"
     self._special_dataset_receivers.add(receiver)
     # 'call' is a string representing the new agent_times_choice call.  Parse it, extract the args, and then add a replacement to
     # parsetree_replacements for the old args.  We want to replace just the args and not the entire call to agent_times_choice,
     # since the way Python represents parsetrees the whole tree may include astype and exponentiation calls, and it's simpler
     # to just replace the args part.
     call = "%s.%s(%s)" % (receiver, method, quote(vars['attribute']))
     (newtree, _) = self._parse_expr(call)
     s, v = match(FULL_EXPRESSION_METHOD_CALL, newtree)
     if not s:
         raise StandardError, 'internal error - problem generating new number_of_agents expression'
     self._parsetree_replacements[args] = v['args']
 def _analyze_agent_times_choice_method_call(self, receiver, method, args):
     same, vars = match(SUBPATTERN_AGENT_TIMES_CHOICE, args)
     if not same:
         raise ValueError, "syntax error for agent_times_choice function call"
     self._special_dataset_receivers.add(receiver)
     # 'call' is a string representing the new agent_times_choice call.  Parse it, extract the args, and then add a replacement to
     # parsetree_replacements for the old args.  We want to replace just the args and not the entire call to agent_times_choice,
     # since the way Python represents parsetrees the whole tree may include astype and exponentiation calls, and it's simpler
     # to just replace the args part. 
     call = "%s.%s(%s)" % (receiver, method, quote(vars['attribute']))
     (newtree,_) = self._parse_expr(call)
     s, v = match(FULL_EXPRESSION_METHOD_CALL, newtree)
     if not s:
         raise StandardError, 'internal error - problem generating new number_of_agents expression'
     self._parsetree_replacements[args] = v['args']
 def _extract_names(self, tree):
     same, vars = match(SUBPATTERN_NAME, tree)
     if same:
         return [vars['name']]
     else:
         ans = []
         for sub in tree[1:]:
             ans.extend(self._extract_names(sub))
         return ans
 def _parse_expr(self, expr):
     # Parse expr and return the parsetree and alias.
     # If expr is just an expression, then alias will be None.
     # If expr is an assignment v=e then alias will be v, and
     # expr_parsetree will be the parsetree for e.
     # If the parse raises a syntax error, just let that be handled
     # by the regular Python compiler's error handler.
     # Raise an exception if the expression doesn't match either an expression
     # or a statement (this would happen if the expression consists of multiple
     # statements, which parses correctly so wouldn't be caught by the Python compiler).
     full_tree = parser.ast2tuple(parser.suite(expr))
     same, vars = match(FULL_TREE_EXPRESSION, full_tree)
     if same:
         return (vars['expr'], None)
     same, vars = match(FULL_TREE_ASSIGNMENT, full_tree)
     if same:
         return (vars['expr'], vars['alias'])
     raise ValueError, "invalid expression (perhaps multiple statements?): " + expr
 def _extract_names(self, tree):
     same, vars = match(SUBPATTERN_NAME, tree)
     if same:
         return [ vars['name'] ]
     else:
         ans = []
         for sub in tree[1:]:
             ans.extend(self._extract_names(sub))
         return ans
 def _parse_expr(self, expr):
     # Parse expr and return the parsetree and alias.
     # If expr is just an expression, then alias will be None.
     # If expr is an assignment v=e then alias will be v, and
     # expr_parsetree will be the parsetree for e.
     # If the parse raises a syntax error, just let that be handled
     # by the regular Python compiler's error handler.
     # Raise an exception if the expression doesn't match either an expression
     # or a statement (this would happen if the expression consists of multiple
     # statements, which parses correctly so wouldn't be caught by the Python compiler).
     full_tree = parser.ast2tuple(parser.suite(expr))
     same, vars = match(FULL_TREE_EXPRESSION, full_tree)
     if same:
         return (vars['expr'], None)
     same, vars = match(FULL_TREE_ASSIGNMENT, full_tree)
     if same:
         return (vars['expr'], vars['alias'])
     raise ValueError, "invalid expression (perhaps multiple statements?): " + expr
 def _analyze_arguments(self, args):
     same, vars = match(SUBPATTERN_ARGUMENT, args[0])
     if same:
         # if part2 exists then part1 is the corresponding keyword - just discard part1
         # otherwise part1 is the argument itself
         if 'part2' in vars:
             self._analyze_tree(vars['part2'])
         else:
             self._analyze_tree(vars['part1'])
     else:
         raise StandardError, 'internal error - problem analyzing arguments in expression'
     if len(args) > 1:
         # skip the comma (which is args[1]) and analyze the remaining arguments
         # (since this passed the Python parser we know that the tree is syntactically correct - no
         # need to check that the comma is there)
         self._analyze_arguments(args[2:])
 def _analyze_arguments(self, args):
     same, vars = match(SUBPATTERN_ARGUMENT, args[0])
     if same:
         # if part2 exists then part1 is the corresponding keyword - just discard part1
         # otherwise part1 is the argument itself
         if 'part2' in vars:
             self._analyze_tree(vars['part2'])
         else:
             self._analyze_tree(vars['part1'])
     else:
         raise StandardError, 'internal error - problem analyzing arguments in expression'
     if len(args)>1:
         # skip the comma (which is args[1]) and analyze the remaining arguments
         # (since this passed the Python parser we know that the tree is syntactically correct - no 
         # need to check that the comma is there)
         self._analyze_arguments(args[2:])
    def _analyze_aggregation_method_call(self, receiver, method, args):
        same, vars = match(SUBPATTERN_AGGREGATION, args)
        if not same:
            raise ValueError, "syntax error for aggregation method call"
        arg_dict = self._get_arguments(
            ('arg1', 'arg2', 'arg3'),
            ('aggr_var', 'intermediates', 'function'), vars)
        if 'aggr_var' not in arg_dict:
            raise ValueError, "syntax error for aggregation method call (problem with argument for variable being aggregated)"
        same1, vars1 = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE_ARG,
                             arg_dict['aggr_var'])
        if same1:
            # the aggregated variable is a fully-qualified name
            pkg = vars1['package']
            dataset = vars1['dataset']
            attr = vars1['shortname']
        else:
            same2, vars2 = match(SUBPATTERN_DATASET_QUALIFIED_VARIABLE_ARG,
                                 arg_dict['aggr_var'])
            if same2:
                # the aggregated variable is a dataset-qualified name
                pkg = None
                dataset = vars2['dataset']
                attr = vars2['shortname']
            else:
                # The thing being aggregated is an expression.  Generate a new autogen variable for that expression,
                # and use the autogen variable in the aggregation call.
                subexpr = arg_dict['aggr_var']
                newvar = VariableName(parsetree_to_string(subexpr))
                pkg = None
                dataset = newvar.get_dataset_name()
                if dataset is None:
                    raise ValueError, "syntax error for aggregation method call - could not determine dataset for variable being aggregated"
                attr = newvar.get_short_name()
                # TODO DELETE BELOW:
#                replacements = {'dataset': dataset, 'attribute': attr}
#                newvar_tree = parsetree_substitute(DATASET_QUALIFIED_VARIABLE_TEMPLATE, replacements)
#                self._parsetree_replacements[subexpr] = newvar_tree
        if 'intermediates' in arg_dict:
            # make sure that it really is a list
            s, v = match(SUBPATTERN_LIST_ARG, arg_dict['intermediates'])
            if not s:
                raise ValueError, "syntax error for aggregation method call (list of intermediate datasets not a list?)"
            intermediates = tuple(
                self._extract_names(arg_dict['intermediates']))
        else:
            intermediates = ()
        if 'function' in arg_dict:
            # bind fcn to a string that is the name of the function, or to the string "None"
            s, v = match(SUBPATTERN_NAME_ARG, arg_dict['function'])
            if not s:
                raise ValueError, "syntax error for aggregation method call (problem with the function argument in the call)"
            fcn = v['name']
        else:
            fcn = None
        self._aggregation_calls.add(
            (receiver, method, pkg, dataset, attr, intermediates, fcn))
        quoted_intermediates = "" if len(intermediates) == 0 else quote(
            intermediates[0])
        for n in intermediates[1:]:
            quoted_intermediates = quoted_intermediates + ', ' + quote(n)
        # 'call' is a string representing the new aggregation call.  Parse it, extract the args, and then add a replacement to
        # parsetree_replacements for the old args.  We want to replace just the args and not the entire call to aggregate,
        # since the way Python represents parsetrees the whole tree may include astype and exponentiation calls, and it's simpler
        # to just replace the args part.
        call = "%s.%s(%s, %s,%s, [%s], %s)" % (
            receiver, method, quote(pkg), quote(dataset), quote(attr),
            quoted_intermediates, quote(fcn))
        (newtree, _) = self._parse_expr(call)
        s, v = match(FULL_EXPRESSION_METHOD_CALL, newtree)
        if not s:
            raise StandardError, 'internal error - problem generating new aggregation expression'
        self._parsetree_replacements[args] = v['args']
    def _analyze_aggregation_method_call(self, receiver, method, args):
        same, vars = match(SUBPATTERN_AGGREGATION, args)
        if not same:
            raise ValueError, "syntax error for aggregation method call"
        arg_dict = self._get_arguments( ('arg1', 'arg2','arg3'), ('aggr_var', 'intermediates','function'), vars )
        if 'aggr_var' not in arg_dict:
            raise ValueError, "syntax error for aggregation method call (problem with argument for variable being aggregated)"
        same1, vars1 = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE_ARG, arg_dict['aggr_var'])
        if same1:
            # the aggregated variable is a fully-qualified name
            pkg = vars1['package']
            dataset = vars1['dataset']
            attr = vars1['shortname']
        else:
            same2, vars2 = match(SUBPATTERN_DATASET_QUALIFIED_VARIABLE_ARG, arg_dict['aggr_var'])
            if same2:
                # the aggregated variable is a dataset-qualified name
                pkg = None
                dataset = vars2['dataset']
                attr = vars2['shortname']
            else:
                # The thing being aggregated is an expression.  Generate a new autogen variable for that expression,
                # and use the autogen variable in the aggregation call.
                subexpr = arg_dict['aggr_var']
                newvar = VariableName(parsetree_to_string(subexpr))
                pkg = None
                dataset = newvar.get_dataset_name()
                if dataset is None:
                    raise ValueError, "syntax error for aggregation method call - could not determine dataset for variable being aggregated"
                attr = newvar.get_short_name()
                # TODO DELETE BELOW:
#                replacements = {'dataset': dataset, 'attribute': attr}
#                newvar_tree = parsetree_substitute(DATASET_QUALIFIED_VARIABLE_TEMPLATE, replacements)
#                self._parsetree_replacements[subexpr] = newvar_tree
        if 'intermediates' in arg_dict:
            # make sure that it really is a list
            s, v = match(SUBPATTERN_LIST_ARG, arg_dict['intermediates'])
            if not s:
                raise ValueError, "syntax error for aggregation method call (list of intermediate datasets not a list?)"
            intermediates = tuple(self._extract_names(arg_dict['intermediates']))
        else:
            intermediates = ()
        if 'function' in arg_dict:
            # bind fcn to a string that is the name of the function, or to the string "None"
            s,v = match(SUBPATTERN_NAME_ARG, arg_dict['function'])
            if not s:
                raise ValueError, "syntax error for aggregation method call (problem with the function argument in the call)"
            fcn = v['name']
        else:
            fcn = None
        self._aggregation_calls.add( (receiver, method, pkg, dataset, attr, intermediates, fcn) )
        quoted_intermediates = "" if len(intermediates)==0 else quote(intermediates[0])
        for n in intermediates[1:]:
            quoted_intermediates = quoted_intermediates + ', ' + quote(n)
        # 'call' is a string representing the new aggregation call.  Parse it, extract the args, and then add a replacement to
        # parsetree_replacements for the old args.  We want to replace just the args and not the entire call to aggregate,
        # since the way Python represents parsetrees the whole tree may include astype and exponentiation calls, and it's simpler
        # to just replace the args part. 
        call = "%s.%s(%s, %s,%s, [%s], %s)" % (receiver, method, quote(pkg), quote(dataset), quote(attr),  quoted_intermediates, quote(fcn))
        (newtree,_) = self._parse_expr(call)
        s, v = match(FULL_EXPRESSION_METHOD_CALL, newtree)
        if not s:
            raise StandardError, 'internal error - problem generating new aggregation expression'
        self._parsetree_replacements[args] = v['args']