Example #1
0
    def _get_function_transitions(
        self, expression: Union[str, List], expected_type: PredicateType
    ) -> Tuple[List[str], PredicateType, List[PredicateType]]:
        """
        A helper method for ``_get_transitions``.  This gets the transitions for the predicate
        itself in a function call.  If we only had simple functions (e.g., "(add 2 3)"), this would
        be pretty straightforward and we wouldn't need a separate method to handle it.  We split it
        out into its own method because handling higher-order functions is complicated (e.g.,
        something like "((negate add) 2 3)").
        """
        # This first block handles getting the transitions and function type (and some error
        # checking) _just for the function itself_.  If this is a simple function, this is easy; if
        # it's a higher-order function, it involves some recursion.
        if isinstance(expression, list):
            # This is a higher-order function.  TODO(mattg): we'll just ignore type checking on
            # higher-order functions, for now.
            transitions, function_type = self._get_transitions(
                expression, None)
        elif expression in self._functions:
            name = expression
            function_types = self._function_types[expression]
            if len(function_types) != 1:
                raise ParsingError(
                    f"{expression} had multiple types; this is not yet supported for functions"
                )
            function_type = function_types[0]
            transitions = [f'{function_type} -> {name}']
        else:
            if isinstance(expression, str):
                raise ParsingError(f"Unrecognized function: {expression[0]}")
            else:
                raise ParsingError(
                    f"Unsupported expression type: {expression}")
        if not isinstance(function_type, FunctionType):
            raise ParsingError(
                f'Zero-arg function or constant called with arguments: {name}')

        # Now that we have the transitions for the function itself, and the function's type, we can
        # get argument types and do the rest of the transitions.
        argument_types = function_type.argument_types
        return_type = function_type.return_type
        right_side = f'[{function_type}, {", ".join(str(arg) for arg in argument_types)}]'
        first_transition = f'{return_type} -> {right_side}'
        transitions.insert(0, first_transition)
        if expected_type and expected_type != return_type:
            raise ParsingError(
                f'{expression} did not have expected type {expected_type} '
                f'(found {return_type})')
        return transitions, return_type, argument_types
Example #2
0
    def action_sequence_to_logical_form(self,
                                        action_sequence: List[str]) -> str:
        """
        Takes an action sequence as produced by :func:`logical_form_to_action_sequence`, which is a
        linearization of an abstract syntax tree, and reconstructs the logical form defined by that
        abstract syntax tree.
        """
        # Basic outline: we assume that the bracketing that we get in the RHS of each action is the
        # correct bracketing for reconstructing the logical form.  This is true when there is no
        # currying in the action sequence.  Given this assumption, we just need to construct a tree
        # from the action sequence, then output all of the leaves in the tree, with brackets around
        # the children of all non-terminal nodes.

        remaining_actions = [
            action.split(" -> ") for action in action_sequence
        ]
        tree = Tree(remaining_actions[0][1], [])

        try:
            remaining_actions = self._construct_node_from_actions(
                tree, remaining_actions[1:])
        except ParsingError:
            logger.error("Error parsing action sequence: %s", action_sequence)
            raise

        if remaining_actions:
            logger.error("Error parsing action sequence: %s", action_sequence)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Extra actions in action sequence")
        return nltk_tree_to_logical_form(tree)
Example #3
0
    def logical_form_to_action_sequence(self, logical_form: str) -> List[str]:
        """
        Converts a logical form into a linearization of the production rules from its abstract
        syntax tree.  The linearization is top-down, depth-first.

        Each production rule is formatted as "LHS -> RHS", where "LHS" is a single non-terminal
        type, and RHS is either a terminal or a list of non-terminals (other possible values for
        RHS in a more general context-free grammar are not produced by our grammar induction
        logic).

        Non-terminals are `types` in the grammar, either basic types (like ``int``, ``str``, or
        some class that you define), or functional types, represented with angle brackets with a
        colon separating arguments from the return type.  Multi-argument functions have commas
        separating their argument types.  For example, ``<int:int>`` is a function that takes an
        integer and returns an integer, and ``<int,int:int>`` is a function that takes two integer
        arguments and returns an integer.

        As an example translation from logical form to complete action sequence, the logical form
        ``(add 2 3)`` would be translated to ``['@start@ -> int', 'int -> [<int,int:int>, int, int]',
        '<int,int:int> -> add', 'int -> 2', 'int -> 3']``.
        """
        expression = util.lisp_to_nested_expression(logical_form)
        try:
            transitions, start_type = self._get_transitions(expression,
                                                            expected_type=None)
            if self._start_types and start_type not in self._start_types:
                raise ParsingError(
                    f"Expression had unallowed start type of {start_type}: {expression}"
                )
        except ParsingError as error:
            logger.error(
                f"Error parsing logical form: {logical_form}: {error}")
            raise
        transitions.insert(0, f"@start@ -> {start_type}")
        return transitions
Example #4
0
    def _construct_node_from_actions(
            self, current_node: Tree,
            remaining_actions: List[List[str]]) -> List[List[str]]:
        """
        Given a current node in the logical form tree, and a list of actions in an action sequence,
        this method fills in the children of the current node from the action sequence, then
        returns whatever actions are left.

        For example, we could get a node with type ``c``, and an action sequence that begins with
        ``c -> [<r,c>, r]``.  This method will add two children to the input node, consuming
        actions from the action sequence for nodes of type ``<r,c>`` (and all of its children,
        recursively) and ``r`` (and all of its children, recursively).  This method assumes that
        action sequences are produced `depth-first`, so all actions for the subtree under ``<r,c>``
        appear before actions for the subtree under ``r``.  If there are any actions in the action
        sequence after the ``<r,c>`` and ``r`` subtrees have terminated in leaf nodes, they will be
        returned.
        """
        if not remaining_actions:
            logger.error("No actions left to construct current node: %s",
                         current_node)
            raise ParsingError("Incomplete action sequence")
        left_side, right_side = remaining_actions.pop(0)
        if left_side != current_node.label():
            logger.error("Current node: %s", current_node)
            logger.error("Next action: %s -> %s", left_side, right_side)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Current node does not match next action")
        if right_side[0] == "[":
            # This is a non-terminal expansion, with more than one child node.
            for child_type in right_side[1:-1].split(", "):
                child_node = Tree(child_type, [])
                current_node.append(
                    child_node
                )  # you add a child to an nltk.Tree with `append`
                # For now, we assume that all children in a list like this are non-terminals, so we
                # recurse on them.  I'm pretty sure that will always be true for the way our
                # grammar induction works.  We can revisit this later if we need to.
                remaining_actions = self._construct_node_from_actions(
                    child_node, remaining_actions)
        else:
            # The current node is a pre-terminal; we'll add a single terminal child.  By
            # construction, the right-hand side of our production rules are only ever terminal
            # productions or lists of non-terminals.
            current_node.append(
                Tree(right_side,
                     []))  # you add a child to an nltk.Tree with `append`
        return remaining_actions
Example #5
0
 def _expression_to_action_sequence(self, expression):
     try:
         transitions, start_type = self._get_transitions(expression,
                                                         expected_type=None)
         if self._start_types and start_type not in self._start_types:
             raise ParsingError(
                 f"Expression had unallowed start type of {start_type}: {expression}"
             )
     except ParsingError as error:
         logger.error(
             f'Error parsing logical form: {logical_form}: {error}')
         raise
     transitions.insert(0, f'@start@ -> {start_type}')
     return transitions
Example #6
0
    def get_logical_form(self,
                         action_sequence: List[str],
                         add_var_function: bool = True) -> str:
        """
        Takes an action sequence and constructs a logical form from it. This is useful if you want
        to get a logical form from a decoded sequence of actions generated by a transition based
        semantic parser.

        Parameters
        ----------
        action_sequence : ``List[str]``
            The sequence of actions as strings (eg.: ``['{START_SYMBOL} -> t', 't -> <e,t>', ...]``).
        add_var_function : ``bool`` (optional)
             ``var`` is a special function that some languages use within lambda functions to
             indicate the use of a variable (eg.: ``(lambda x (fb:row.row.year (var x)))``). Due to
             the way constrained decoding is currently implemented, it is easier for the decoder to
             not produce these functions. In that case, setting this flag adds the function in the
             logical form even though it is not present in the action sequence.
        """
        # Basic outline: we assume that the bracketing that we get in the RHS of each action is the
        # correct bracketing for reconstructing the logical form.  This is true when there is no
        # currying in the action sequence.  Given this assumption, we just need to construct a tree
        # from the action sequence, then output all of the leaves in the tree, with brackets around
        # the children of all non-terminal nodes.

        remaining_actions = [
            action.split(" -> ") for action in action_sequence
        ]
        tree = Tree(remaining_actions[0][1], [])

        try:
            remaining_actions = self._construct_node_from_actions(
                tree, remaining_actions[1:], add_var_function)
        except ParsingError:
            logger.error("Error parsing action sequence: %s", action_sequence)
            raise

        if remaining_actions:
            logger.error("Error parsing action sequence: %s", action_sequence)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Extra actions in action sequence")
        return nltk_tree_to_logical_form(tree)
Example #7
0
    def _get_transitions(
            self, expression: Any,
            expected_type: PredicateType) -> Tuple[List[str], PredicateType]:
        """
        This is used when converting a logical form into an action sequence.  This piece
        recursively translates a lisp expression into an action sequence, making sure we match the
        expected type (or using the expected type to get the right type for constant expressions).
        """
        if isinstance(expression, (list, tuple)):
            function_transitions, return_type, argument_types = self._get_function_transitions(
                expression[0], expected_type)
            if len(argument_types) != len(expression[1:]):
                raise ParsingError(
                    f"Wrong number of arguments for function in {expression}")
            argument_transitions = []
            for argument_type, subexpression in zip(argument_types,
                                                    expression[1:]):
                argument_transitions.extend(
                    self._get_transitions(subexpression, argument_type)[0])
            return function_transitions + argument_transitions, return_type
        elif isinstance(expression, str):
            if expression not in self._functions:
                raise ParsingError(f"Unrecognized constant: {expression}")
            constant_types = self._function_types[expression]
            if len(constant_types) == 1:
                constant_type = constant_types[0]
                # This constant had only one type; that's the easy case.
                if expected_type and expected_type != constant_type:
                    raise ParsingError(
                        f"{expression} did not have expected type {expected_type} "
                        f"(found {constant_type})")
                return [f"{constant_type} -> {expression}"], constant_type
            else:
                if not expected_type:
                    raise ParsingError(
                        "With no expected type and multiple types to pick from "
                        f"I don't know what type to use (constant was {expression})"
                    )
                if expected_type not in constant_types:
                    raise ParsingError(
                        f"{expression} did not have expected type {expected_type} "
                        f"(found these options: {constant_types}; none matched)"
                    )
                return [f"{expected_type} -> {expression}"], expected_type

        else:
            raise ParsingError(
                "Not sure how you got here. Please open an issue on github with details."
            )
Example #8
0
 def _map_name(self, name: str, keep_mapping: bool = False) -> str:
     if name not in types.COMMON_NAME_MAPPING and name not in self.local_name_mapping:
         if not keep_mapping:
             raise ParsingError(f"Encountered un-mapped name: {name}")
         if name.startswith("fb:row.row"):
             # Column name
             translated_name = "C%d" % self._column_counter
             self._column_counter += 1
             self._add_name_mapping(name, translated_name,
                                    types.COLUMN_TYPE)
         elif name.startswith("fb:cell"):
             # Cell name
             translated_name = "cell:%s" % name.split(".")[-1]
             self._add_name_mapping(name, translated_name, types.CELL_TYPE)
         elif name.startswith("fb:part"):
             # part name
             translated_name = "part:%s" % name.split(".")[-1]
             self._add_name_mapping(name, translated_name, types.PART_TYPE)
         else:
             # The only other unmapped names we should see are numbers.
             # NLTK throws an error if it sees a "." in constants, which will most likely happen
             # within numbers as a decimal point. We're changing those to underscores.
             translated_name = name.replace(".", "_")
             if re.match("-[0-9_]+", translated_name):
                 # The string is a negative number. This makes NLTK interpret this as a negated
                 # expression and force its type to be TRUTH_VALUE (t).
                 translated_name = translated_name.replace("-", "~")
             translated_name = f"num:{translated_name}"
             self._add_name_mapping(name, translated_name,
                                    types.NUMBER_TYPE)
     else:
         if name in types.COMMON_NAME_MAPPING:
             translated_name = types.COMMON_NAME_MAPPING[name]
         else:
             translated_name = self.local_name_mapping[name]
     return translated_name
Example #9
0
    def _construct_node_from_actions(
            self, current_node: Tree, remaining_actions: List[List[str]],
            add_var_function: bool) -> List[List[str]]:
        """
        Given a current node in the logical form tree, and a list of actions in an action sequence,
        this method fills in the children of the current node from the action sequence, then
        returns whatever actions are left.

        For example, we could get a node with type ``c``, and an action sequence that begins with
        ``c -> [<r,c>, r]``.  This method will add two children to the input node, consuming
        actions from the action sequence for nodes of type ``<r,c>`` (and all of its children,
        recursively) and ``r`` (and all of its children, recursively).  This method assumes that
        action sequences are produced `depth-first`, so all actions for the subtree under ``<r,c>``
        appear before actions for the subtree under ``r``.  If there are any actions in the action
        sequence after the ``<r,c>`` and ``r`` subtrees have terminated in leaf nodes, they will be
        returned.
        """
        if not remaining_actions:
            logger.error("No actions left to construct current node: %s",
                         current_node)
            raise ParsingError("Incomplete action sequence")
        left_side, right_side = remaining_actions.pop(0)
        if left_side != current_node.label():
            mismatch = True
            multi_match_mapping = {
                str(key): [str(value) for value in values]
                for key, values in self.get_multi_match_mapping().items()
            }
            current_label = current_node.label()
            if current_label in multi_match_mapping and left_side in multi_match_mapping[
                    current_label]:
                mismatch = False
            if mismatch:
                logger.error("Current node: %s", current_node)
                logger.error("Next action: %s -> %s", left_side, right_side)
                logger.error("Remaining actions were: %s", remaining_actions)
                raise ParsingError("Current node does not match next action")
        if right_side[0] == '[':
            # This is a non-terminal expansion, with more than one child node.
            for child_type in right_side[1:-1].split(', '):
                if child_type.startswith("'lambda"):
                    # We need to special-case the handling of lambda here, because it's handled a
                    # bit weirdly in the action sequence.  This is stripping off the single quotes
                    # around something like `'lambda x'`.
                    child_type = child_type[1:-1]
                child_node = Tree(child_type, [])
                current_node.append(
                    child_node
                )  # you add a child to an nltk.Tree with `append`
                if not self.is_terminal(child_type):
                    remaining_actions = self._construct_node_from_actions(
                        child_node, remaining_actions, add_var_function)
        elif self.is_terminal(right_side):
            # The current node is a pre-terminal; we'll add a single terminal child.  We need to
            # check first for whether we need to add a (var _) around the terminal node, though.
            if add_var_function and right_side in self._lambda_variables:
                right_side = f"(var {right_side})"
            if add_var_function and right_side == 'var':
                raise ParsingError(
                    'add_var_function was true, but action sequence already had var'
                )
            current_node.append(
                Tree(right_side,
                     []))  # you add a child to an nltk.Tree with `append`
        else:
            # The only way this can happen is if you have a unary non-terminal production rule.
            # That is almost certainly not what you want with this kind of grammar, so we'll crash.
            # If you really do want this, open a PR with a valid use case.
            raise ParsingError(
                f"Found a unary production rule: {left_side} -> {right_side}. "
                "Are you sure you want a unary production rule in your grammar?"
            )
        return remaining_actions