def _execute_expression(self, expression: Any): """ This does the bulk of the work of executing a logical form, recursively executing a single expression. Basically, if the expression is a function we know about, we evaluate its arguments then call the function. If it's a list, we evaluate all elements of the list. If it's a constant (or a zero-argument function), we evaluate the constant. """ # pylint: disable=too-many-return-statements if isinstance(expression, list): if isinstance(expression[0], list): function = self._execute_expression(expression[0]) elif expression[0] in self._functions: function = self._functions[expression[0]] else: if isinstance(expression[0], str): raise ExecutionError( f"Unrecognized function: {expression[0]}") else: raise ExecutionError( f"Unsupported expression type: {expression}") arguments = [ self._execute_expression(arg) for arg in expression[1:] ] try: return function(*arguments) except (TypeError, ValueError): traceback.print_exc() raise ExecutionError( f"Error executing expression {expression} (see stderr for stack trace)" ) elif isinstance(expression, str): if expression not in self._functions: raise ExecutionError(f"Unrecognized constant: {expression}") # This is a bit of a quirk in how we represent constants and zero-argument functions. # For consistency, constants are wrapped in a zero-argument lambda. So both constants # and zero-argument functions are callable in `self._functions`, and are `BasicTypes` # in `self._function_types`. For these, we want to return # `self._functions[expression]()` _calling_ the zero-argument function. If we get a # `FunctionType` in here, that means we're referring to the function as a first-class # object, instead of calling it (maybe as an argument to a higher-order function). In # that case, we return the function _without_ calling it. # Also, we just check the first function type here, because we assume you haven't # registered the same function with both a constant type and a `FunctionType`. if isinstance(self._function_types[expression][0], FunctionType): return self._functions[expression] else: return self._functions[expression]() return self._functions[expression] else: raise ExecutionError( "Not sure how you got here. Please open a github issue with details." )
def __gt__(self, other) -> bool: # pylint: disable=too-many-return-statements # The logic below is tricky, and is based on some assumptions we make about date comparison. # Year, month or day being -1 means that we do not know its value. In those cases, the # we consider the comparison to be undefined, and return False if all the fields that are # more significant than the field being compared are equal. However, when year is -1 for both # dates being compared, it is safe to assume that the year is not specified because it is # the same. So we make an exception just in that case. That is, we deem the comparison # undefined only when one of the year values is -1, but not both. if not isinstance(other, Date): raise ExecutionError("only compare Dates with Dates") # We're doing an exclusive or below. if (self.year == -1) != (other.year == -1): return False # comparison undefined # If both years are -1, we proceed. if self.year != other.year: return self.year > other.year # The years are equal and not -1, or both are -1. if self.month == -1 or other.month == -1: return False if self.month != other.month: return self.month > other.month # The months and years are equal and not -1 if self.day == -1 or other.day == -1: return False return self.day > other.day
def __eq__(self, other) -> bool: # Note that the logic below renders equality to be non-transitive. That is, # Date(2018, -1, -1) == Date(2018, 2, 3) and Date(2018, -1, -1) == Date(2018, 4, 5) # but Date(2018, 2, 3) != Date(2018, 4, 5). if not isinstance(other, Date): raise ExecutionError("only compare Dates with Dates") year_is_same = self.year == -1 or other.year == -1 or self.year == other.year month_is_same = self.month == -1 or other.month == -1 or self.month == other.month day_is_same = self.day == -1 or other.day == -1 or self.day == other.day return year_is_same and month_is_same and day_is_same
def min_number(self, rows: List[Row], column: NumberColumn) -> Number: """ Takes a list of rows and a column and returns the min of the values under that column in those rows. """ cell_values = [row.values[column.name] for row in rows] if not cell_values: return 0.0 # type: ignore if not all([isinstance(value, Number) for value in cell_values]): raise ExecutionError(f"Invalid values for number selection function: {cell_values}") return min(cell_values) # type: ignore
def min_date(self, rows: List[Row], column: DateColumn) -> Date: """ Takes a list of rows and a column and returns the min of the values under that column in those rows. """ cell_values = [row.values[column.name] for row in rows] if not cell_values: return Date(-1, -1, -1) if not all([isinstance(value, Date) for value in cell_values]): raise ExecutionError(f"Invalid values for date selection function: {cell_values}") return min(cell_values) # type: ignore
def mode_string(self, rows: List[Row], column: StringColumn) -> List[str]: """ Takes a list of rows and a column and returns the most frequent values (one or more) under that column in those rows. """ most_frequent_list = self._get_most_frequent_values(rows, column) if not most_frequent_list: return [] if not all([isinstance(value, str) for value in most_frequent_list]): raise ExecutionError(f"Invalid values for mode_string: {most_frequent_list}") return most_frequent_list
def mode_date(self, rows: List[Row], column: DateColumn) -> Date: """ Takes a list of rows and a column and returns the most frequent value under that column in those rows. """ most_frequent_list = self._get_most_frequent_values(rows, column) if not most_frequent_list: return Date(-1, -1, -1) most_frequent_value = most_frequent_list[0] if not isinstance(most_frequent_value, Date): raise ExecutionError(f"Invalid valus for mode_date: {most_frequent_value}") return most_frequent_value
def mode_number(self, rows: List[Row], column: NumberColumn) -> Number: """ Takes a list of rows and a column and returns the most frequent value under that column in those rows. """ most_frequent_list = self._get_most_frequent_values(rows, column) if not most_frequent_list: return 0.0 # type: ignore most_frequent_value = most_frequent_list[0] if not isinstance(most_frequent_value, Number): raise ExecutionError(f"Invalid valus for mode_number: {most_frequent_value}") return most_frequent_value
def diff(self, first_row: List[Row], second_row: List[Row], column: NumberColumn) -> Number: """ Takes a two rows and a number column and returns the difference between the values under that column in those two rows. """ if not first_row or not second_row: return 0.0 # type: ignore first_value = first_row[0].values[column.name] second_value = second_row[0].values[column.name] if isinstance(first_value, float) and isinstance(second_value, float): return first_value - second_value # type: ignore else: raise ExecutionError(f"Invalid column for diff: {column.name}")
def filter_not_in(self, rows: List[Row], column: StringColumn, filter_values: List[str]) -> List[Row]: # We accept a list of filter values instead of a single string to allow the outputs of select like # operations to be passed in as filter values. # Assuming filter value has underscores for spaces. The cell values also have underscores # for spaces, so we do not need to replace them here. # Note that if a list of filter values is passed, we only use the first one. if not filter_values: raise ExecutionError(f"Unexpected filter value: {filter_values}") if isinstance(filter_values, str): filter_value = filter_values elif isinstance(filter_values, list): filter_value = filter_values[0] else: raise ExecutionError(f"Unexpected filter value: {filter_values}") # Also, we need to remove the "string:" that was prepended to the entity name in the language. filter_value = filter_value.lstrip('string:') filtered_rows: List[Row] = [] for row in rows: cell_value = row.values[column.name] if isinstance(cell_value, str) and filter_value not in cell_value: filtered_rows.append(row) return filtered_rows
def execute_action_sequence(self, action_sequence: List[str], side_arguments: List[Dict] = None): """ Executes the program defined by an action sequence directly, without needing the overhead of translating to a logical form first. For any given program, :func:`execute` and this function are equivalent, they just take different representations of the program, so you can use whichever is more efficient. Also, if you have state or side arguments associated with particular production rules (e.g., the decoder's attention on an input utterance when a predicate was predicted), you `must` use this function to execute the logical form, instead of :func:`execute`, so that we can match the side arguments with the right functions. """ # We'll strip off the first action, because it doesn't matter for execution. first_action = action_sequence[0] left_side = first_action.split(' -> ')[0] if left_side != '@start@': raise ExecutionError('invalid action sequence') remaining_actions = action_sequence[1:] remaining_side_args = side_arguments[1:] if side_arguments else None return self._execute_sequence(remaining_actions, remaining_side_args)[0]
def _execute_sequence( self, action_sequence: List[str], side_arguments: List[Dict]) -> Tuple[Any, List[str], List[Dict]]: """ This does the bulk of the work of :func:`execute_action_sequence`, recursively executing the functions it finds and trimming actions off of the action sequence. The return value is a tuple of (execution, remaining_actions), where the second value is necessary to handle the recursion. """ if not action_sequence: raise ExecutionError("invalid action sequence") first_action = action_sequence[0] remaining_actions = action_sequence[1:] remaining_side_args = side_arguments[1:] if side_arguments else None right_side = first_action.split(" -> ")[1] if right_side in self._functions: function = self._functions[right_side] # mypy doesn't like this check, saying that Callable isn't a reasonable thing to pass # here. But it works just fine; I'm not sure why mypy complains about it. if isinstance(function, Callable): # type: ignore function_arguments = inspect.signature(function).parameters if not function_arguments: # This was a zero-argument function / constant that was registered as a lambda # function, for consistency of execution in `execute()`. execution_value = function() elif side_arguments: kwargs = {} non_kwargs = [] for argument_name in function_arguments: if argument_name in side_arguments[0]: kwargs[argument_name] = side_arguments[0][ argument_name] else: non_kwargs.append(argument_name) if kwargs and non_kwargs: # This is a function that has both side arguments and logical form # arguments - we curry the function so only the logical form arguments are # left. def curried_function(*args): return function(*args, **kwargs) execution_value = curried_function elif kwargs: # This is a function that _only_ has side arguments - we just call the # function and return a value. execution_value = function(**kwargs) else: # This is a function that has logical form arguments, but no side arguments # that match what we were given - just return the function itself. execution_value = function else: execution_value = function return execution_value, remaining_actions, remaining_side_args else: # This is a non-terminal expansion, like 'int -> [<int:int>, int, int]'. We need to # get the function and its arguments, then call the function with its arguments. # Because we linearize the abstract syntax tree depth first, left-to-right, we can just # recursively call `_execute_sequence` for the function and all of its arguments, and # things will just work. right_side_parts = right_side.split(", ") # We don't really need to know what the types are, just how many of them there are, so # we recurse the right number of times. function, remaining_actions, remaining_side_args = self._execute_sequence( remaining_actions, remaining_side_args) arguments = [] for _ in right_side_parts[1:]: argument, remaining_actions, remaining_side_args = self._execute_sequence( remaining_actions, remaining_side_args) arguments.append(argument) return function(*arguments), remaining_actions, remaining_side_args
def __ge__(self, other) -> bool: if not isinstance(other, Date): raise ExecutionError("only compare Dates with Dates") return self > other or self == other