def addNonTerminal(nodes,node):
    name=str(node.get('Cat')) + ('_'+ str(node.get('Rule')) if node.get('Rule')!=None else '') + ('_' + str(node.get('ClType')) if node.get('ClType')!=None else '')
    nodeNumber = whichNode(name,nodes)
    if nodeNumber == -1:
        nodes.append(NonTerminal(name))
        children = []
        for child in node.getchildren():
            if 'Unicode' in child.keys():
                features = FeatStruct()
                for key in child.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(child.get(key))+'\']'))
                children.append('{' + str(child.get('Cat')) + '_' + stringFeatures(features) + '}')
            else:
                children.append(str(child.get('Cat')) + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
        nodes[len(nodes)-1].addPath(children,nodes)
    else:
        children = []
        for child in node.getchildren():
            if 'Unicode' in child.keys():
                features = FeatStruct()
                for key in child.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(child.get(key))+'\']'))
                children.append('{' + str(child.get('Cat')) + '_' + stringFeatures(features) + '}')
            else:
                children.append(str(child.get('Cat')) + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
        nodes[nodeNumber].addPath(children,nodes)
def smartUnify(*featstructs):
    '''Unifies two or more feature structures based on what they have
    in common. For example, [person=1,number=2] and [case=3,number=2]
    will return [number=2], not [person=1,number=2,case=3] as
    featStruct.unify() would do. Return None if unable to unify.
    
    Arguments:
    *featstructs -- Any number of feature structures to unify.
    '''
    
    # Create a list of lists of feature structures in each overall
    # structure. 
    lis = []
    for struct in featstructs:
        lis.append([])
        for item in struct:
            lis[len(lis)-1].append(item)
    # Create a set based on the first structure list, and then form
    # the intersection of the remaining lists, leaving only what is
    # left in common between all lists.
    s = set(lis[0])
    f = FeatStruct()
    for l in lis:
        s = s.intersection(l)
    # Unify the set together to form the resulting FeatStruct
    for struct in featstructs:
        for i in struct:
            if i in s:
                f = f.unify(FeatStruct('['+str(i)+'='+str(struct[i])+']'))
                if f == None:
                    return None
    return f
def addTerminal(nodes,node):
    features = FeatStruct()
    for key in node.keys():
        if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
            features = features.unify(FeatStruct('['+str(key)+'=\''+str(node.get(key))+'\']'))
    name = '{' + str(node.get('Cat')) + '_' + stringFeatures(features) + '}'
    nodeNumber = whichNode(name,nodes)
    if nodeNumber == -1:
        nodes.append(Terminal(name[1:name.find('_')],features))
Example #4
0
def featStruct(gapUp,
               semUp,
               varUp,
               arriveFlag=False,
               destVpFlag=False,
               sourceVpFlag=False,
               busNameNpFlag=False,
               destNpFlag=False,
               departFlag=False,
               departVpFlag=False):
    gap = Variable('?gap')

    if arriveFlag and not (destVpFlag) and not (sourceVpFlag):
        vp = FeatStruct(arrive=FeatStruct(
            a=Variable('?a'),
            f=Variable('?f'),
            t=FeatStruct(t_var=Variable('?t'), time_var=Variable('?time'))))
    elif departFlag and departVpFlag:
        vp = FeatStruct(depart=FeatStruct(
            d=Variable('?d'),
            f=Variable('?fDep'),
            t=FeatStruct(t_var=Variable('?t_var_dep'),
                         time_var=Variable('?timeDepart'))),
                        source=FeatStruct(bus=Variable('?h'),
                                          sourceName=FeatStruct(
                                              f=Variable('?h'),
                                              name=Variable('?nameSource'))))
    else:
        vp = FeatStruct(
            depart=FeatStruct(d=Variable('?d'),
                              f=Variable('?fDep'),
                              t=FeatStruct(t_var=Variable('?t_var_dep'),
                                           time_var=Variable('?timeDepart'))),
            source=FeatStruct(bus=Variable('?h'),
                              sourceName=FeatStruct(
                                  f=Variable('?h'),
                                  name=Variable('?nameSource'))),
            arrive=FeatStruct(a=Variable('?a'),
                              f=Variable('?fArr'),
                              t=FeatStruct(t_var=Variable('?t_var_arr'),
                                           time_var=Variable('?timeArrive'))),
            dest=FeatStruct(destName=FeatStruct(
                f=Variable('?f'),
                name=FeatStruct(h=Variable('?hDest'),
                                name=Variable('?nameDest')))))

    if destNpFlag and not (busNameNpFlag):
        np = FeatStruct(dest=FeatStruct(
            bus=Variable('?f'),
            dest=FeatStruct(f=Variable('?f'),
                            name=FeatStruct(h=Variable('?h'),
                                            name=Variable('?name')))))
    else:
        np = FeatStruct(
            the=FeatStruct(bus=Variable('?b'),
                           busname=FeatStruct(h=Variable('?h_BusName'),
                                              name=Variable('?busName'))))

    wh = FeatStruct(
        whType=FeatStruct(f=Variable('?f'), type=Variable('?type')))
    sem = FeatStruct(query=FeatStruct(vp=vp, np=np, wh=wh))
    var = Variable('?a')

    para = FeatStruct(gap=gap, sem=sem, var=var)
    paraUpdate = FeatStruct(gap=gapUp, sem=semUp, var=varUp)
    # print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    # print(paraUpdate)
    # paraUpdate.unify(para)['sem']['query']['vp']['arrive']['f']
    return paraUpdate.unify(para)
        self.value = value
        self.features = features
        
nodes = []

tree = ET.parse('65.xml')
root = tree.getroot()

for sentence in root.iter('Sentence'):
    for tree in sentence.iter('Tree'):
        for node in tree.iter('Node'):
            if 'Unicode' in node.keys():
                features = FeatStruct()
                for key in node.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(node.get(key))+'\']'))
                nodes.append(Terminal(node.get('Cat'),node.get('Unicode'),features))
            else:
                name = str(node.get('Cat')) + ('_'+ str(node.get('Rule')) if node.get('Rule')!=None else '') + ('_' + str(node.get('ClType')) if node.get('ClType')!=None else '')
                children = []
                for child in node.getchildren():
                    children.append(child.get('Cat') + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
                nodes.append(NonTerminal(name,children))
    nodes.append('------')
            
def printCFG(nodes):
    s = ''
    for node in nodes:
        if isinstance(node,Terminal):
            s += node.name + ' --> {' + node.name + '_'
            for key in node.features.keys():
def analyze_template(s):
    # The return value of this function is a tuple. The first element of the tuple is a dictionary
    # using identifiers from morph.flat, and the entries are feature structures
    # with proper values set. The second element is a dictionary using keys from
    # syntax-coded.flat, which will return a list containing all feature structures
    # from a given identifier.
    lines = s.splitlines()
    feature_list = {}
    feature_list2 = {}
    for l in lines:
        #print l
        l = l.strip()
        if l == '' or l[0] == ';':
            continue
        
        index = l.find(';')
        if index != -1:
            l = l[:index]
            l = l.strip()
        
        if l[0] == '@':
            if l[-1] != '!':
                raise TypeError("Each line should be terminated with a '!'")
            l = l[1:-1]
            # we only split the name and the other part
            temp = l.split(None,1)
            name = temp[0]
            l = l[len(name):].strip()
            features = l.split(',')

            fs = FeatStruct()
            for f in features:
                f = f.strip()
                index = f.find('=')
                if f[0] == '@' and feature_list.has_key(f[1:]):
                    fs = fs.unify(feature_list[f[1:]])    # unify() does not change in-place
                elif index != -1:
                    lhs = f[:index].strip()
                    rhs = f[index + 1:].strip()
                    if rhs[0] == '@':      # rhs can also be reference
                        rhs = feature_list[rhs[1:]]
                    if lhs[0] != '<' or lhs[-1] != '>':
                        raise TypeError('The left hand side of a feature structure must be wrapped with <>')
                    lhs = lhs[1:-1]
                    path = lhs.split()
                    #path.reverse()    # This method is in-place
                    fs = fs.unify(make_fs(path,rhs))
            feature_list[name] = fs
            #print name
            #print fs,'\n'
        elif l[0] == '#':
            if l[-1] != '!':
                raise TypeError('Invalid input line, must be terminated by "!"')
            l = l[1:-1]
            tokens = l.split(None,1)     # Split for once using space character
            word_pos = tokens[0].strip()

            features = tokens[1].split(',')
            new_fs = FeatStruct()
            for fs in features:
                tokens = fs.split(':',1)
                node_type = tokens[0].strip()
                tokens = tokens[1].split('=',1)
                lhs = tokens[0].strip()[1:-1]    # Remove <>
                rhs = tokens[1].strip()
                lhs = lhs.split()
                if new_fs.has_key(node_type):
                    new_fs[node_type] = new_fs[node_type].unify(make_fs(lhs,rhs))
                else:
                    new_fs[node_type] = make_fs(lhs,rhs)
            if feature_list2.has_key(word_pos) == False:
                feature_list2[word_pos] = new_fs
            else:
                #feature_list2[word_pos].append(new_fs)
                raise KeyError('Duplicate defitinion detected: %s.' % (word_pos))
        else:
            raise TypeError('Cannot recognize line: %s.' % (l))
    return (feature_list,feature_list2)