Example #1
0
def build_sign_from_node(element):
    """
    Given the XML element of a CCG sign, builds the internal 
    representation.
    
    """
    # Assume there's only one child we're interested in (the category)
    child_elements = remove_unwanted_elements(element.childNodes)
    cat_elem = child_elements[0]
    # Get the category structure from the category node
    gramcategory = build_category_from_node(cat_elem)
    
    lf_elems = element.getElementsByTagName("lf")
    if len(lf_elems) == 0 or len(remove_unwanted_elements(lf_elems[0].childNodes)) == 0:
        raise GrammarReadError, "No logical form found for entry: "\
                "%s. What is syntax without semantics?" % cat_elem.toxml()
        # Steedman, 2010 (private correspondence)
    lf_elem = lf_elems[0]
    lf_children = remove_unwanted_elements(lf_elem.childNodes)
    ## Get semantics from the lf node
    sems = build_lf_from_node(lf_children[0])
    if sems is None:
        raise GrammarReadError, "Could not build semantic " \
                "representation for %s." % lf_elem.toxml()
    sems = Semantics(sems)
    
    # Store the full category for this entry
    return Sign(gramcategory, sems)
Example #2
0
def build_sign_from_node(element):
    """
    Given the XML element of a CCG sign, builds the internal 
    representation.
    
    """
    # Assume there's only one child we're interested in (the category)
    child_elements = remove_unwanted_elements(element.childNodes)
    cat_elem = child_elements[0]
    # Get the category structure from the category node
    gramcategory = build_category_from_node(cat_elem)

    lf_elems = element.getElementsByTagName("lf")
    if len(lf_elems) == 0 or len(
            remove_unwanted_elements(lf_elems[0].childNodes)) == 0:
        raise GrammarReadError, "No logical form found for entry: "\
                "%s. What is syntax without semantics?" % cat_elem.toxml()
        # Steedman, 2010 (private correspondence)
    lf_elem = lf_elems[0]
    lf_children = remove_unwanted_elements(lf_elem.childNodes)
    ## Get semantics from the lf node
    sems = build_lf_from_node(lf_children[0])
    if sems is None:
        raise GrammarReadError, "Could not build semantic " \
                "representation for %s." % lf_elem.toxml()
    sems = Semantics(sems)

    # Store the full category for this entry
    return Sign(gramcategory, sems)
Example #3
0
def build_lf_from_node(elem):
    """
    Given the "lf" node of a lexical entry, builds a logical form
    representing it internally.
    
    @return: a LogicalForm built from the node
    
    """
    name = elem.nodeName
    if name == "point":
        # A point in the (as yet equally tempered) tonal space, 
        #  relative to the chord of the chord
        x,y = require_attrs(elem, ["x", "y"])
        x,y = int(x), int(y)
        
        if not 0 <= x < 4 or not 0 <= y < 3:
            raise GrammarReadError, "equal temperament tonal space "\
                "points should be between (0,0) and (3,2): got (%d,%d)" \
                    % (x,y)
        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A tonal space point cannot have children."
        
        return LexicalCoordinate((x,y))
    elif name == "list":
        # A path of points (usually just one point)
        subnodes = remove_unwanted_elements(elem.childNodes)
        
        children = [build_lf_from_node(node) for node in subnodes]
        return List(children)
    elif name == "leftonto":
        # A leftonto predicate literal
        
        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A leftonto predicate cannot "\
                    "have children."
        
        return Leftonto()
    elif name == "rightonto":
        # A rightonto predicate literal
        
        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A rightonto predicate cannot "\
                    "have children."
        
        return Rightonto()
    elif name == "now":
        # A now predicate literal
        
        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A now predicate cannot "\
                    "have children."
        
        return Now()
    elif name == "abstraction":
        # Lambda abstraction
        # All children except the last are abstracted variables
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) < 2:
            raise GrammarReadError, "No subexpression in lambda "\
                    "abstraction: %s" % elem.toxml()
        variables = [build_lf_from_node(node) for node in subnodes[:-1]]
        for var in variables:
            if not isinstance(var, Variable):
                raise GrammarReadError, "Can only abstract over "\
                    "variables, not %s" % type(var).__name__
        expression = build_lf_from_node(subnodes[-1])
        
        return multi_abstract(*tuple(variables+[expression]))
    elif name == "application":
        # Function application
        # Recursively build functor and argument LFs
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) < 2:
            raise GrammarReadError, "Function application needs to "\
                    "have at least two subnodes"
        children = [build_lf_from_node(node) for node in subnodes]
        
        return multi_apply(*children)
    elif name == "variable":
        # Variable reference
        varid = require_attr(elem, "name")
        
        return Variable(varid)
    else:
        raise GrammarReadError, "Got invalid node %s in LF" % name
Example #4
0
def build_lf_from_node(elem):
    """
    Given the "lf" node of a lexical entry, builds a logical form
    representing it internally.
    
    @return: a LogicalForm built from the node
    
    """
    name = elem.nodeName
    if name == "point":
        # A point in the (as yet equally tempered) tonal space,
        #  relative to the chord of the chord
        x, y = require_attrs(elem, ["x", "y"])
        x, y = int(x), int(y)

        if not 0 <= x < 4 or not 0 <= y < 3:
            raise GrammarReadError, "equal temperament tonal space "\
                "points should be between (0,0) and (3,2): got (%d,%d)" \
                    % (x,y)
        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A tonal space point cannot have children."

        return LexicalCoordinate((x, y))
    elif name == "list":
        # A path of points (usually just one point)
        subnodes = remove_unwanted_elements(elem.childNodes)

        children = [build_lf_from_node(node) for node in subnodes]
        return List(children)
    elif name == "leftonto":
        # A leftonto predicate literal

        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A leftonto predicate cannot "\
                    "have children."

        return Leftonto()
    elif name == "rightonto":
        # A rightonto predicate literal

        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A rightonto predicate cannot "\
                    "have children."

        return Rightonto()
    elif name == "now":
        # A now predicate literal

        # Shouldn't be any children
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) != 0:
            raise GrammarReadError, "A now predicate cannot "\
                    "have children."

        return Now()
    elif name == "abstraction":
        # Lambda abstraction
        # All children except the last are abstracted variables
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) < 2:
            raise GrammarReadError, "No subexpression in lambda "\
                    "abstraction: %s" % elem.toxml()
        variables = [build_lf_from_node(node) for node in subnodes[:-1]]
        for var in variables:
            if not isinstance(var, Variable):
                raise GrammarReadError, "Can only abstract over "\
                    "variables, not %s" % type(var).__name__
        expression = build_lf_from_node(subnodes[-1])

        return multi_abstract(*tuple(variables + [expression]))
    elif name == "application":
        # Function application
        # Recursively build functor and argument LFs
        subnodes = remove_unwanted_elements(elem.childNodes)
        if len(subnodes) < 2:
            raise GrammarReadError, "Function application needs to "\
                    "have at least two subnodes"
        children = [build_lf_from_node(node) for node in subnodes]

        return multi_apply(*children)
    elif name == "variable":
        # Variable reference
        varid = require_attr(elem, "name")

        return Variable(varid)
    else:
        raise GrammarReadError, "Got invalid node %s in LF" % name
Example #5
0
    def __init__(self, grammar_name=None):
        """ 
        Creates a new grammar by reading from an XML grammar file.
        
        Words (morph items) are stored in morph_items.
        Families (lexical families) are stored in families.
        
        Instantiate this directly only if you want, for some reason, to be sure 
        of getting a new instance of Grammar. Most of the time, you can 
        load a named grammar using L{get_grammar}, which will cache already 
        loaded grammars and return the same instance again if you ask for the 
        same name.
        
        @type grammar_name: string
        @param grammar_name: name of the grammar definition to be loaded. 
            Call L{get_grammar_names} for a list of available grammars. If 
            None, loads the default grammar.
        
        """
        if grammar_name is None:
            grammar_name = settings.DEFAULT_GRAMMAR
        self.name = grammar_name

        filename_base = os.path.join(settings.GRAMMAR_DATA_DIR, grammar_name)
        self.grammar_file = os.path.join(filename_base, "grammar.xml")
        # Read in the grammar
        logger.debug("Grammar: %s" % self.grammar_file)

        # Read in the XML from the file
        self.grammar_dom = xml.dom.minidom.parse(self.grammar_file)

        grammar_tag = get_single_element_by_tag_name(self.grammar_dom,
                                                     "grammar")
        # Get a named formalism, or the default one
        formalism_attr = grammar_tag.attributes.getNamedItem("formalism")
        if formalism_attr is None:
            formalism = get_default_formalism()
        else:
            formalism_name = str(formalism_attr.value)
            try:
                formalism = get_formalism(formalism_name)
            except FormalismLoadError:
                logger.error("The formalism '%s' does not exist. Possible "\
                    "formalisms are: %s" % (formalism_name, ", ".join(FORMALISMS)))
                raise
        self.formalism = formalism

        ###############################
        ### Reading in the lexicon
        lex_tag = get_single_element_by_tag_name(self.grammar_dom, "lexicon")
        lexicon_file = os.path.join(
            filename_base,
            lex_tag.attributes.getNamedItem("file").value)
        logger.debug("Lexicon: %s" % lexicon_file)
        # Read in the lexicon
        self.lexicon_dom = xml.dom.minidom.parse(lexicon_file)

        ###############################
        ### Reading in the words
        morph_tag = get_single_element_by_tag_name(self.grammar_dom,
                                                   "morphology")
        morph_file = os.path.join(
            filename_base,
            morph_tag.attributes.getNamedItem("file").value)
        logger.debug("Morphology: %s" % morph_file)
        # Read in the lexicon
        self.morph_dom = xml.dom.minidom.parse(morph_file)

        ###############################
        ### Reading in the rules
        rules_tag = get_single_element_by_tag_name(self.grammar_dom, "rules")
        rules_file = os.path.join(
            filename_base,
            rules_tag.attributes.getNamedItem("file").value)
        logger.debug("Rules: %s" % rules_file)
        # Read in the lexicon
        self.rules_dom = xml.dom.minidom.parse(rules_file)

        ###############################
        ### Reading in the functions list (only used for certain formalisms)
        functions_tag = get_single_element_by_tag_name(self.grammar_dom,
                                                       "functions",
                                                       optional=True)
        self.literal_functions = {}
        available_funs = formalism.literal_functions
        if functions_tag is not None:
            functions_file = os.path.join(
                filename_base,
                functions_tag.attributes.getNamedItem("file").value)
            logger.debug("Functions: %s" % functions_file)
            # Read in the functions from the XML
            functions_dom = xml.dom.minidom.parse(functions_file)
            functions_xml = get_single_element_by_tag_name(
                functions_dom, "functions")
            functions = remove_unwanted_elements(
                functions_xml.getElementsByTagName("function"))
            # Try adding each of the functions, using the formalism's definitions
            for func_el in functions:
                func_name = func_el.attributes.getNamedItem("name").value
                if func_name in available_funs:
                    lit_fun = available_funs[func_name]
                    self.literal_functions[lit_fun.name] = lit_fun
                else:
                    raise GrammarReadError, "The literal function \"%s\" is not defined in the code for the %s formalism." % formalism.get_name(
                    )

        ###############################
        ### Reading in the modality hierarchy
        modalities_tag = get_single_element_by_tag_name(self.grammar_dom,
                                                        "modalities",
                                                        optional=True)
        if modalities_tag is not None:
            modalities_file = os.path.join(
                filename_base,
                modalities_tag.attributes.getNamedItem("file").value)
            logger.debug("Modalities: %s" % modalities_file)
            # Read in the modalities
            self.modalities_dom = get_single_element_by_tag_name(
                xml.dom.minidom.parse(modalities_file), "modalities")
        else:
            self.modalities_dom = None

        ###############################
        ### Read in grammar-level meta data
        attrs = self.grammar_dom.getElementsByTagName("attr")
        # Initialize values that might not get set
        self.max_categories = None
        # Read in the values from the XML
        for el in attrs:
            name = el.getAttribute("name")
            value = el.getAttribute("value")
            # Check for all the attributes we recognize
            if name == "max_categories":
                self.max_categories = int(value)

        ###############################
        ### Prepare the morph word classes
        self.chord_classes = {}
        for entry in self.morph_dom.getElementsByTagName("class"):
            chord_class = ChordClass.from_dom(entry)
            self.chord_classes[chord_class.name] = chord_class

        # Maybe handle macros here. Not currently using them.

        ###############################
        ### Prepare lexical entries
        # Use a hash table for this too, indexed by pos
        self.families = {}
        self.inactive_families = []
        for family in self.lexicon_dom.getElementsByTagName("family"):
            fam = Family.from_dom(formalism, family)
            # Check whether the family has any entries and don't use it if not
            if len(fam.entries) > 0:
                # Put a new Family in the table for every family entry
                if fam.pos in self.families:
                    # Already an entry for this POS: add to the list
                    self.families[fam.pos].append(fam)
                else:
                    # No occurence of this POS yet: add a new list
                    self.families[fam.pos] = [fam]
            else:
                self.inactive_families.append(fam.pos)

        ###############################
        ### Prepare the morph items
        self.morphs = []
        for entry in self.morph_dom.getElementsByTagName("entry"):
            morph = MorphItem.from_dom(formalism, entry, self.chord_classes)
            self.morphs.append(morph)

        # Check that all the morphs correspond to a defined POS
        for morph in self.morphs:
            if morph.pos not in self.families:
                raise GrammarReadError, "morph item refers to undefined "\
                    "part-of-speech '%s': %s" % (morph.pos, morph.element.toxml())

        ###############################
        ### Prepare modalities hierarchy
        if self.modalities_dom:
            self.modality_tree = ModalityTree.from_dom(self.modalities_dom)
        else:
            # The modalities that existed before they were added to the
            #  XML spec were just "c" and "."
            self.modality_tree = ModalityTree(
                [ModalityTreeNode("", [ModalityTreeNode("c")])])

        ###############################
        ### Prepare rules
        self.rules = []
        # Go through each different type of rule and add appropriate Rule subclasses
        rule_block = get_single_element_by_tag_name(self.rules_dom, "rules")

        for rule_tag in remove_unwanted_elements(rule_block.childNodes):
            rulename = rule_tag.tagName
            if rulename == "lexrules":
                # We'll deal with these later
                continue
            if rulename not in self.formalism.rules:
                raise GrammarReadError, "unknown rule '%s' (formalism "\
                    "defines: %s)" % (rulename, ", ".join(formalism.rules.keys()))
            ruleclass = self.formalism.rules[rulename]
            # Instantiate the rule, using options from the XML
            self.rules.append(
                ruleclass(modalities=self.modality_tree,
                          grammar=self,
                          **attrs_to_dict(rule_tag.attributes)))

        # Keep rules sorted by arity for ease of access
        self.unary_rules = []
        self.binary_rules = []
        for rule in self.rules:
            if rule.arity == 1:
                self.unary_rules.append(rule)
            elif rule.arity == 2:
                self.binary_rules.append(rule)

        # Index rules by internal name for ease of access
        self.rules_by_name = {}
        for rule in self.rules:
            if rule.internal_name in self.rules_by_name:
                # This shouldn't happen: each rule name should only be used once
                raise GrammarReadError, "instantiated two rules with the same "\
                    "internal name: %s. Either the XML has mistakenly "\
                    "instantiated the same thing twice, or the rule class has "\
                    "failed to give different varieties of the rule different "\
                    "names" % rule.internal_name
            self.rules_by_name[rule.internal_name] = rule

        # Optionally read in a lexrules element and expand the lexicon
        #  using its entries
        self.lexical_rules = []
        lexrules_tag = get_single_element_by_tag_name(self.rules_dom,
                                                      "lexrules",
                                                      optional=True)
        if lexrules_tag is not None:
            for rule_tag in remove_unwanted_elements(lexrules_tag.childNodes):
                rulename = rule_tag.tagName
                if rulename not in self.formalism.rules:
                    raise GrammarReadError, "unknown lexical expansion "\
                        "rule '%s' (formalism defines: %s)" % \
                        (rulename, ", ".join(formalism.rules.keys()))
                ruleclass = self.formalism.rules[rulename]
                attrs = attrs_to_dict(rule_tag.attributes)
                # Make sure expanded category has a suffix to put on
                #  POSs. If one isn't given, set a default.
                if "pos_suffix" in attrs:
                    pos_suffix = attrs["pos_suffix"]
                    del attrs["pos_suffix"]
                else:
                    pos_suffix = "_Rep"
                # Instantiate the rule, using any options given
                rule = ruleclass(modalities=self.modality_tree,
                                 grammar=self,
                                 **attrs)
                rule.pos_suffix = pos_suffix
                # Can only use unary rules - check this one is
                if rule.arity != 1:
                    raise "can only use unary rules as lexical "\
                        "expansions. Tried to use %s, which has arity "\
                        "%d." % (rulename, rule.arity)
                self.lexical_rules.append(rule)
        # Use each lexical rule to expand the lexicon
        for rule in self.lexical_rules:
            for fam in sum(self.families.values(), []):
                for entry in fam.entries:
                    # Try apply the expansion rule to this entry
                    new_signs = rule.apply_rule([entry.sign])
                    if new_signs is not None and len(new_signs) > 0:
                        # Make a new POS for this expanded category
                        new_pos = "%s%s" % (fam.pos, rule.pos_suffix)
                        new_entries = [EntriesItem(self.formalism, "Expanded", new_sign) \
                                    for new_sign in new_signs]
                        new_family = Family(self.formalism,
                                            new_pos,
                                            new_pos,
                                            new_entries,
                                            chordfn=fam.chordfn,
                                            expanded=rule.internal_name)
                        self.families.setdefault(new_pos,
                                                 []).append(new_family)
                        # Also create morph items for each of those
                        #  that referenced the old unexpanded rules
                        for morph in [
                                m for m in self.morphs if m.pos == fam.pos
                        ]:
                            self.morphs.append(
                                MorphItem(self.formalism,
                                          copy.deepcopy(morph.words),
                                          new_pos,
                                          optional_minor=morph.optional_minor,
                                          chord_class=morph.chord_class))

        ###############
        # Index the morph items by word to make lookup easier
        self.morph_items = {}
        for morph in self.morphs:
            # If the pos is completely inactive in the lexicon, ignore this morph
            if not morph.pos in self.inactive_families:
                # Go through each of this morph's words
                for word in morph.words:
                    # Put a new MorphItem in the table for every entry
                    if word in self.morph_items:
                        # Already a list for this word: add to it
                        self.morph_items[word].append(morph)
                    else:
                        # First occurence of this word: add a new list
                        self.morph_items[word] = [morph]

        ###############
        # Read in an equivalence map if one is given for morph entries
        equiv_map_el = get_single_element_by_tag_name(self.morph_dom,
                                                      "equivmap",
                                                      optional=True)
        if equiv_map_el is not None:
            self.equiv_map = EquivalenceMap.from_dom(formalism, equiv_map_el,
                                                     self.chord_classes,
                                                     self.morphs)
        else:
            self.equiv_map = EquivalenceMap()

        ###########
        # Prepare a version of the family list for MIDI input
        self.midi_families = {}
        for pos, fams in self.families.items():
            new_fams = []
            for fam in fams:
                # Exclude any generated by lexical expansions, unless they're
                #  tonic function
                if fam.expanded is not None and fam.chordfn != "T":
                    continue
                new_fams.append(fam)
            if new_fams:
                # Exclude any that are mapped onto another entry by an equivalence
                #  mapping that changes the root
                if pos in self.equiv_map:
                    continue
                self.midi_families[pos] = new_fams

        ####### Debugging output
        logger.debug("Read the following information from the grammar:")
        logger.debug("Morphology:")
        logger.debug("\n".join(["%s: %s" % (word, ", ".join(["%s" % item.pos for item in items])) \
                         for word,items in self.morph_items.items()]))
        logger.debug("Lexicon:")
        logger.debug("\n".join([", ".join(["%s" % initem for initem in item]) \
                         for item in self.families.values()]))
        logger.debug("Rules:")
        logger.debug("\n".join(["  %s" % item for item in self.rules]))
        logger.debug("Lexical expansion rules:")
        logger.debug("\n".join(["  %s" % item for item in self.lexical_rules]))
        logger.debug("Modalities:")
        logger.debug("%s" % self.modality_tree)
        if len(self.literal_functions):
            logger.debug("Literal functions:")
            logger.debug("\n".join([
                "  %s: %s" % (name, val)
                for (name, val) in self.literal_functions.items()
            ]))
Example #6
0
 def __init__(self, grammar_name=None):
     """ 
     Creates a new grammar by reading from an XML grammar file.
     
     Words (morph items) are stored in morph_items.
     Families (lexical families) are stored in families.
     
     Instantiate this directly only if you want, for some reason, to be sure 
     of getting a new instance of Grammar. Most of the time, you can 
     load a named grammar using L{get_grammar}, which will cache already 
     loaded grammars and return the same instance again if you ask for the 
     same name.
     
     @type grammar_name: string
     @param grammar_name: name of the grammar definition to be loaded. 
         Call L{get_grammar_names} for a list of available grammars. If 
         None, loads the default grammar.
     
     """
     if grammar_name is None:
         grammar_name = settings.DEFAULT_GRAMMAR
     self.name = grammar_name
     
     filename_base = os.path.join(settings.GRAMMAR_DATA_DIR, grammar_name)
     self.grammar_file = os.path.join(filename_base, "grammar.xml")
     # Read in the grammar
     logger.debug("Grammar: %s" % self.grammar_file)
     
     # Read in the XML from the file
     self.grammar_dom = xml.dom.minidom.parse(self.grammar_file)
     
     grammar_tag = get_single_element_by_tag_name(self.grammar_dom, "grammar")
     # Get a named formalism, or the default one
     formalism_attr = grammar_tag.attributes.getNamedItem("formalism")
     if formalism_attr is None:
         formalism = get_default_formalism()
     else:
         formalism_name = str(formalism_attr.value)
         try:
             formalism = get_formalism(formalism_name)
         except FormalismLoadError:
             logger.error("The formalism '%s' does not exist. Possible "\
                 "formalisms are: %s" % (formalism_name, ", ".join(FORMALISMS)))
             raise
     self.formalism = formalism
     
     ###############################
     ### Reading in the lexicon
     lex_tag = get_single_element_by_tag_name(self.grammar_dom, "lexicon")
     lexicon_file = os.path.join(filename_base, lex_tag.attributes.getNamedItem("file").value)
     logger.debug("Lexicon: %s" % lexicon_file)
     # Read in the lexicon
     self.lexicon_dom = xml.dom.minidom.parse(lexicon_file)
     
     ###############################
     ### Reading in the words
     morph_tag = get_single_element_by_tag_name(self.grammar_dom, "morphology")
     morph_file = os.path.join(filename_base, morph_tag.attributes.getNamedItem("file").value)
     logger.debug( "Morphology: %s" % morph_file)
     # Read in the lexicon
     self.morph_dom = xml.dom.minidom.parse(morph_file)
     
     ###############################
     ### Reading in the rules
     rules_tag = get_single_element_by_tag_name(self.grammar_dom, "rules")
     rules_file = os.path.join(filename_base, rules_tag.attributes.getNamedItem("file").value)
     logger.debug( "Rules: %s" % rules_file)
     # Read in the lexicon
     self.rules_dom = xml.dom.minidom.parse(rules_file)
     
     ###############################
     ### Reading in the functions list (only used for certain formalisms)
     functions_tag = get_single_element_by_tag_name(self.grammar_dom, "functions", optional=True)
     self.literal_functions = {}
     available_funs = formalism.literal_functions
     if functions_tag is not None:
         functions_file = os.path.join(filename_base, functions_tag.attributes.getNamedItem("file").value)
         logger.debug( "Functions: %s" % functions_file)
         # Read in the functions from the XML
         functions_dom = xml.dom.minidom.parse(functions_file)
         functions_xml = get_single_element_by_tag_name(functions_dom, "functions")
         functions = remove_unwanted_elements(functions_xml.getElementsByTagName("function"))
         # Try adding each of the functions, using the formalism's definitions
         for func_el in functions:
             func_name = func_el.attributes.getNamedItem("name").value
             if func_name in available_funs:
                 lit_fun = available_funs[func_name]
                 self.literal_functions[lit_fun.name] = lit_fun
             else:
                 raise GrammarReadError, "The literal function \"%s\" is not defined in the code for the %s formalism." % formalism.get_name()
     
     ###############################
     ### Reading in the modality hierarchy
     modalities_tag = get_single_element_by_tag_name(self.grammar_dom, "modalities", optional=True)
     if modalities_tag is not None:
         modalities_file = os.path.join(filename_base, modalities_tag.attributes.getNamedItem("file").value)
         logger.debug( "Modalities: %s" % modalities_file)
         # Read in the modalities
         self.modalities_dom = get_single_element_by_tag_name(xml.dom.minidom.parse(modalities_file), "modalities")
     else:
         self.modalities_dom = None
         
     ###############################
     ### Read in grammar-level meta data
     attrs = self.grammar_dom.getElementsByTagName("attr")
     # Initialize values that might not get set
     self.max_categories = None
     # Read in the values from the XML
     for el in attrs:
         name = el.getAttribute("name")
         value = el.getAttribute("value")
         # Check for all the attributes we recognize
         if name == "max_categories":
             self.max_categories = int(value)
     
     ###############################
     ### Prepare the morph word classes
     self.chord_classes = {}
     for entry in self.morph_dom.getElementsByTagName("class"):
         chord_class = ChordClass.from_dom(entry)
         self.chord_classes[chord_class.name] = chord_class
     
     # Maybe handle macros here. Not currently using them.
     
     ###############################
     ### Prepare lexical entries
     # Use a hash table for this too, indexed by pos
     self.families = {}
     self.inactive_families = []
     for family in self.lexicon_dom.getElementsByTagName("family"):
         fam = Family.from_dom(formalism, family)
         # Check whether the family has any entries and don't use it if not
         if len(fam.entries) > 0:
             # Put a new Family in the table for every family entry
             if fam.pos in self.families:
                 # Already an entry for this POS: add to the list
                 self.families[fam.pos].append(fam)
             else:
                 # No occurence of this POS yet: add a new list
                 self.families[fam.pos] = [fam]
         else:
             self.inactive_families.append(fam.pos)
     
     ###############################
     ### Prepare the morph items
     self.morphs = []
     for entry in self.morph_dom.getElementsByTagName("entry"):
         morph = MorphItem.from_dom(formalism,entry,self.chord_classes)
         self.morphs.append(morph)
     
     # Check that all the morphs correspond to a defined POS
     for morph in self.morphs:
         if morph.pos not in self.families:
             raise GrammarReadError, "morph item refers to undefined "\
                 "part-of-speech '%s': %s" % (morph.pos, morph.element.toxml())
             
     ###############################
     ### Prepare modalities hierarchy
     if self.modalities_dom:
         self.modality_tree = ModalityTree.from_dom(self.modalities_dom)
     else:
         # The modalities that existed before they were added to the 
         #  XML spec were just "c" and "."
         self.modality_tree = ModalityTree([
                                 ModalityTreeNode("", 
                                     [ModalityTreeNode("c")]) ])
         
     ###############################
     ### Prepare rules
     self.rules = []
     # Go through each different type of rule and add appropriate Rule subclasses
     rule_block = get_single_element_by_tag_name(self.rules_dom, "rules")
     
     for rule_tag in remove_unwanted_elements(rule_block.childNodes):
         rulename = rule_tag.tagName
         if rulename == "lexrules":
             # We'll deal with these later
             continue
         if rulename not in self.formalism.rules:
             raise GrammarReadError, "unknown rule '%s' (formalism "\
                 "defines: %s)" % (rulename, ", ".join(formalism.rules.keys()))
         ruleclass = self.formalism.rules[rulename]
         # Instantiate the rule, using options from the XML
         self.rules.append(ruleclass(modalities=self.modality_tree, grammar=self, **attrs_to_dict(rule_tag.attributes)))
         
     # Keep rules sorted by arity for ease of access
     self.unary_rules = []
     self.binary_rules = []
     for rule in self.rules:
         if rule.arity == 1:
             self.unary_rules.append(rule)
         elif rule.arity == 2:
             self.binary_rules.append(rule)
             
     # Index rules by internal name for ease of access
     self.rules_by_name = {}
     for rule in self.rules:
         if rule.internal_name in self.rules_by_name:
             # This shouldn't happen: each rule name should only be used once
             raise GrammarReadError, "instantiated two rules with the same "\
                 "internal name: %s. Either the XML has mistakenly "\
                 "instantiated the same thing twice, or the rule class has "\
                 "failed to give different varieties of the rule different "\
                 "names" % rule.internal_name
         self.rules_by_name[rule.internal_name] = rule
             
     # Optionally read in a lexrules element and expand the lexicon 
     #  using its entries
     self.lexical_rules = []
     lexrules_tag = get_single_element_by_tag_name(self.rules_dom, "lexrules", optional=True)
     if lexrules_tag is not None:
         for rule_tag in remove_unwanted_elements(lexrules_tag.childNodes):
             rulename = rule_tag.tagName
             if rulename not in self.formalism.rules:
                 raise GrammarReadError, "unknown lexical expansion "\
                     "rule '%s' (formalism defines: %s)" % \
                     (rulename, ", ".join(formalism.rules.keys()))
             ruleclass = self.formalism.rules[rulename]
             attrs = attrs_to_dict(rule_tag.attributes)
             # Make sure expanded category has a suffix to put on 
             #  POSs. If one isn't given, set a default.
             if "pos_suffix" in attrs:
                 pos_suffix = attrs["pos_suffix"]
                 del attrs["pos_suffix"]
             else:
                 pos_suffix = "_Rep"
             # Instantiate the rule, using any options given
             rule = ruleclass(modalities=self.modality_tree,
                                 grammar=self, 
                                 **attrs)
             rule.pos_suffix = pos_suffix
             # Can only use unary rules - check this one is
             if rule.arity != 1:
                 raise "can only use unary rules as lexical "\
                     "expansions. Tried to use %s, which has arity "\
                     "%d." % (rulename, rule.arity)
             self.lexical_rules.append(rule)
     # Use each lexical rule to expand the lexicon
     for rule in self.lexical_rules:
         for fam in sum(self.families.values(), []):
             for entry in fam.entries:
                 # Try apply the expansion rule to this entry
                 new_signs = rule.apply_rule([entry.sign])
                 if new_signs is not None and len(new_signs) > 0:
                     # Make a new POS for this expanded category
                     new_pos = "%s%s" % (fam.pos, rule.pos_suffix)
                     new_entries = [EntriesItem(self.formalism, "Expanded", new_sign) \
                                 for new_sign in new_signs]
                     new_family = Family(self.formalism, 
                                         new_pos, 
                                         new_pos,
                                         new_entries,
                                         chordfn=fam.chordfn,
                                         expanded=rule.internal_name)
                     self.families.setdefault(new_pos, []).append(new_family)
                     # Also create morph items for each of those 
                     #  that referenced the old unexpanded rules
                     for morph in [m for m in self.morphs if m.pos == fam.pos]:
                         self.morphs.append(
                                 MorphItem(
                                     self.formalism,
                                     copy.deepcopy(morph.words),
                                     new_pos,
                                     optional_minor=morph.optional_minor,
                                     chord_class=morph.chord_class))
     
     ###############
     # Index the morph items by word to make lookup easier
     self.morph_items = {}
     for morph in self.morphs:
         # If the pos is completely inactive in the lexicon, ignore this morph
         if not morph.pos in self.inactive_families:
             # Go through each of this morph's words
             for word in morph.words:
                 # Put a new MorphItem in the table for every entry
                 if word in self.morph_items:
                     # Already a list for this word: add to it
                     self.morph_items[word].append(morph)
                 else:
                     # First occurence of this word: add a new list
                     self.morph_items[word] = [morph]
     
     ###############
     # Read in an equivalence map if one is given for morph entries
     equiv_map_el = get_single_element_by_tag_name(self.morph_dom, "equivmap", optional=True)
     if equiv_map_el is not None:
         self.equiv_map = EquivalenceMap.from_dom(formalism, 
                                                  equiv_map_el, 
                                                  self.chord_classes, 
                                                  self.morphs)
     else:
         self.equiv_map = EquivalenceMap()
     
     ###########
     # Prepare a version of the family list for MIDI input
     self.midi_families = {}
     for pos,fams in self.families.items():
         new_fams = []
         for fam in fams:
             # Exclude any generated by lexical expansions, unless they're 
             #  tonic function
             if fam.expanded is not None and fam.chordfn != "T":
                 continue
             new_fams.append(fam)
         if new_fams:
             # Exclude any that are mapped onto another entry by an equivalence 
             #  mapping that changes the root
             if pos in self.equiv_map:
                 continue
             self.midi_families[pos] = new_fams
     
     ####### Debugging output
     logger.debug( "Read the following information from the grammar:")
     logger.debug( "Morphology:")
     logger.debug("\n".join(["%s: %s" % (word, ", ".join(["%s" % item.pos for item in items])) \
                      for word,items in self.morph_items.items()]))
     logger.debug("Lexicon:")
     logger.debug("\n".join([", ".join(["%s" % initem for initem in item]) \
                      for item in self.families.values()]))
     logger.debug("Rules:")
     logger.debug("\n".join(["  %s" % item for item in self.rules]))
     logger.debug("Lexical expansion rules:")
     logger.debug("\n".join(["  %s" % item for item in self.lexical_rules]))
     logger.debug("Modalities:")
     logger.debug("%s" % self.modality_tree)
     if len(self.literal_functions):
         logger.debug("Literal functions:")
         logger.debug("\n".join(["  %s: %s" % (name,val) for (name,val) in self.literal_functions.items()]))