コード例 #1
0
def addNonTerminal(nodes,node):
    name=str(node.get('Cat')) + ('_'+ str(node.get('Rule')) if node.get('Rule')!=None else '') + ('_' + str(node.get('ClType')) if node.get('ClType')!=None else '')
    nodeNumber = whichNode(name,nodes)
    if nodeNumber == -1:
        nodes.append(NonTerminal(name))
        children = []
        for child in node.getchildren():
            if 'Unicode' in child.keys():
                features = FeatStruct()
                for key in child.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(child.get(key))+'\']'))
                children.append('{' + str(child.get('Cat')) + '_' + stringFeatures(features) + '}')
            else:
                children.append(str(child.get('Cat')) + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
        nodes[len(nodes)-1].addPath(children,nodes)
    else:
        children = []
        for child in node.getchildren():
            if 'Unicode' in child.keys():
                features = FeatStruct()
                for key in child.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(child.get(key))+'\']'))
                children.append('{' + str(child.get('Cat')) + '_' + stringFeatures(features) + '}')
            else:
                children.append(str(child.get('Cat')) + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
        nodes[nodeNumber].addPath(children,nodes)
コード例 #2
0
def get_instance_featstructs(root):
    instances = get_instances(root)
    events = get_events(root)
    for i in instances:
        try:
            i.set(
                'event',
                next(
                    FeatStruct({
                        **{k: e.attrib[k]
                           for k in ['class', 'stem']},
                        **{
                            'text': e.text
                        }
                    }) for e in events if e.get('eid') == i.get('eventID')))
        except KeyError:
            i.set(
                'event',
                next(
                    FeatStruct({
                        **{k: e.attrib[k]
                           for k in ['class']},
                        **{
                            'text': e.text
                        }
                    }) for e in events if e.get('eid') == i.get('eventID')))
    return [
        FeatStruct({k: i.attrib[k]
                    for k in set(i.keys()) - set(['eventID'])})
        for i in instances
    ]
コード例 #3
0
def smartUnify(*featstructs):
    '''Unifies two or more feature structures based on what they have
    in common. For example, [person=1,number=2] and [case=3,number=2]
    will return [number=2], not [person=1,number=2,case=3] as
    featStruct.unify() would do. Return None if unable to unify.
    
    Arguments:
    *featstructs -- Any number of feature structures to unify.
    '''
    
    # Create a list of lists of feature structures in each overall
    # structure. 
    lis = []
    for struct in featstructs:
        lis.append([])
        for item in struct:
            lis[len(lis)-1].append(item)
    # Create a set based on the first structure list, and then form
    # the intersection of the remaining lists, leaving only what is
    # left in common between all lists.
    s = set(lis[0])
    f = FeatStruct()
    for l in lis:
        s = s.intersection(l)
    # Unify the set together to form the resulting FeatStruct
    for struct in featstructs:
        for i in struct:
            if i in s:
                f = f.unify(FeatStruct('['+str(i)+'='+str(struct[i])+']'))
                if f == None:
                    return None
    return f
コード例 #4
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def debug_test_contain():
    fs100 = FeatStruct()
    fs100['__or_wzq'] = 'wzq'
    #fs100['__or_qwe'] = 'qwe'
    fs101 = FeatStruct()
    fs101['__or_123'] = '123'
    fs101['__or_wzq'] = 'wzq'
    print test_contain(fs100, fs101)
コード例 #5
0
def addTerminal(nodes,node):
    features = FeatStruct()
    for key in node.keys():
        if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
            features = features.unify(FeatStruct('['+str(key)+'=\''+str(node.get(key))+'\']'))
    name = '{' + str(node.get('Cat')) + '_' + stringFeatures(features) + '}'
    nodeNumber = whichNode(name,nodes)
    if nodeNumber == -1:
        nodes.append(Terminal(name[1:name.find('_')],features))
コード例 #6
0
def match_feature(feature, regexp, operation=0):
    """
    match_feature(feature,regexp,operation=0) -> FeatStruct

    This function is used to filter a feature structure with a regular exression.
    The regular expression should be written in the form that XTAG system uses,
    which has a '__value__' entry at the last level of indexing.

    feature: The feature that you would like to filter

    regexp: An acceptable regular expression by module re

    operation: 0 if positive filtering will be done, 1 if negative filtering.
    positive filtering means that all RHS values that match the regexp will be
    retained, while negative filtering means that all RHS values that doesn't
    match will be retained.
    """
    new_feature = FeatStruct()
    count = 0
    for i in feature.keys():
        val = feature[i]
        if val.has_key('__value__'):
            #search_ret = re.search(regexp,val['__value__'])
            search_ret = re.search(regexp, i)
            if operation == 0 and search_ret != None:
                new_feature[i] = val
                count += 1
            elif operation == 1 and search_ret == None:
                new_feature[i] = val
                count += 1
        else:
            search_ret = re.search(regexp, i)
            if operation == 0 and search_ret != None:
                new_feature[i] = val
                count += 1
            elif operation == 1 and search_ret == None:
                ret = match_feature(val, regexp, operation)
                if ret != None:
                    new_feature[i] = ret
                else:
                    new_feature[i] = FeatStruct()
                count += 1
            elif operation == 1 and search_ret != None:
                pass
            else:
                ret = match_feature(val, regexp, operation)
                #print ret,'\n'
                if ret != None:
                    new_feature[i] = ret
                    count += 1

    #print new_feature,'\n'
    if count == 0:
        return None
    else:
        return new_feature
コード例 #7
0
 def topic_features(self, article):
     word_occurence = self.get_word_occurence(article)
     feature_list = FeatStruct(word_occurence)
     feature_list.freeze()
     feature = FeatStruct(word_occurence=feature_list)
     feature.freeze()
     return dict([(word, True) for word in word_occurence])
コード例 #8
0
def make_fs(lhs,rhs):
    # This function makes a feature structure using a list of lhs which are nested
    # e.g. if lhs = ['a','b','c','d'] and rhs = 'wzq' then the
    # fs shoule be [a = [b = [c = [d = 'wzq']]]]
    new_fs = FeatStruct()
    
    if len(lhs) == 1:
        inner = FeatStruct()
        inner['__value__'] = rhs
        rhs = inner
        new_fs[lhs[0]] = rhs
    else:
        new_fs[lhs[0]] = make_fs(lhs[1:],rhs)
        
    return new_fs
コード例 #9
0
ファイル: utils.py プロジェクト: ppke-nlpg/AnaGramma-Parser
def nested_frozen_fs(dictionary):
    if not isinstance(dictionary, FeatStruct):
        ret = FeatStruct()
        for k, v in dictionary.items():
            v_new = v
            if isinstance(v_new, set):
                v_new = frozenset(v_new)
            elif isinstance(v_new, dict):
                v_new = nested_frozen_fs(v_new)
            ret[k] = v_new
        ret.freeze()
        return ret
    else:
        dictionary.freeze()
    return dictionary
コード例 #10
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def make_rhs_using_or(rhs):
    """
    :param rhs: The right hand string which may contain the 'or' relationship
    :type rhs: str

    :return: A feature structure using '__or_' structure
    :rtype: FeatStruct

    This function will return a feature structure which satisfies the
    requirement for implementing the 'or' relationship in the xtag grammar.
    rhs must be a string, whose value will be used to construct the lhs
    inside the new feature structure.

    For example,

    rhs = a/b/c ->  [ __or_a = a ]
                    [ __or_b = b ]
                    [ __or_c = c ]
    """
    new_fs = FeatStruct()
    slash = rhs.find('/')
    if slash == -1:
        rhs = [rhs]
    else:
        rhs = rhs.split('/')
    # After this rhs is a list containing the entities in the 'or' relation
    for i in rhs:
        lhs = make_leaf_str(i)
        new_fs[lhs] = i
    return new_fs
コード例 #11
0
def fourth_pass(xtag_trees):
    """
    fourth_pass() -> list

    Given the result of third_pass(), this function will make
    use of FeatStruct, and build a feature structure dictionary.  
    """
    for xtag_entry in xtag_trees:
        features = {}
        for feature_entry in xtag_entry[1]:
            lhs = feature_entry[0]
            rhs = feature_entry[1]
            l_separator = lhs.find(':')
            r_separator = rhs.find(':')

            if r_separator == -1:
                l_id = lhs[:l_separator]
                l_space = lhs.find(' ')

                feat_rhs = FeatStruct()
                feat_rhs["__value__"] = rhs
                #feat_rhs = rhs

                if (l_space == -1):
                    l_feature = lhs[l_separator + 2:-1]
                    add_two_feature(features, l_id, feat_rhs, l_feature)
                else:
                    l_feature1 = lhs[l_separator + 2:l_space]
                    l_feature2 = lhs[l_space + 1:-1]
                    add_two_feature(features, l_id, feat_rhs, l_feature1,
                                    l_feature2)

        xtag_entry[4] = features

    return xtag_trees
コード例 #12
0
def _naive_unify(fstruct1:FeatStruct, fstruct2:FeatStruct):

    newfs = copy.copy(fstruct1)
    if _is_mapping(fstruct1) and _is_mapping(fstruct2):

    # Unify any values that are defined in both fstruct1 and
    # fstruct2.  Copy any values that are defined in fstruct2 but
    # not in fstruct1 to fstruct1.  Note: sorting fstruct2's
    # features isn't actually necessary; but we do it to give
    # deterministic behavior, e.g. for tracing.
        for fname, fval2 in sorted(fstruct2.items()):
            if fname in fstruct1:
                newfs[fname] = _naive_unify(fstruct1[fname], fval2)
            else:
                newfs[fname] = fval2

        return newfs # Contains the unified value.

    # Unifying two sequences:
    elif _is_sequence(fstruct1) and _is_sequence(fstruct2):
        # Concatenate the values !!
        # Don't unify corresponding values in fstruct1 and fstruct2.
        newfs += fstruct2
        newfs = tuple([t for t in newfs if not isinstance(t, Variable)])
        return newfs # Contains the unified value.

    else:
        return None
コード例 #13
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def remove_or_tag(feature):
    """
    :param feature: The feature structure that you want to remove the '__or_' tag
    :type feature: FeatStruct

    :return: A new feature structure with "__or_" removed and combined
    :rtype: FeatStruct

    Given a feature structure in the internal repersentation of our xtag system
    (i.e. each leaf is wrapped with an '__or_' + lhs feature struct), this function
    will get rid of the __or_ tag, and produce a feature structure where no __or_
    is there, and the multiple or relation is represented as [__or_1]/[__or_2]/ ...

    e.g. for fs = [apple = [__or_a = 'a']]
                  [        [__or_b = 'b']]
                  [        [__or_c = 'c']]
                  
    remove_or_tag(fs) will return:

      fs_return = [apple = 'a/b/c']
    """
    new_feature = FeatStruct()
    for key in feature.keys():
        entry = feature[key]
        entry_keys = entry.keys()
        if test_leaf(entry) == True:
            str_or_removed = entry[entry_keys[0]]
            if len(entry_keys) > 1:
                for i in entry_keys[1:]:
                    str_or_removed += '/' + entry[i]

            new_feature[key] = str_or_removed
        else:
            new_feature[key] = remove_or_tag(feature[key])
    return new_feature
コード例 #14
0
def find_probability(list1,list2,targword):
    c=0
    prob_list=[]
    sense_word_list=[]
    for i in range(len(list1)):
        for j in range(len(list1[i])):
            for k in range(len(list1[i][j])):
                #print(type(list1[i][j][k]),type(list2[0][0][k]))
                if(list1[i][j][k]==list2[0][0][k]):
                    c+=1
        prob=c/(5*len(list1)*len(list1[i]))
        prob_list.append(prob)

    max_prob=max(prob_list)
    sensetag=prob_list.index(max_prob)
    with codecs.open(targword + '/Senses00' + str(sensetag) + '.txt', encoding='utf-16') as f:
        sense_word_list.append(f.read())


    print("\n")
    print(sense_word_list)


    #print(sense_word_list[sense_word_list.index('&'):sense_word_list.index('!') ])
    try:
        LinearSVC_classifier = SklearnClassifier(LinearSVC())
        LinearSVC_classifier.train(list1)
        print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, list2)) * 100)
    except:
        #classifier=nltk.NaiveBayesClassifier.train(FeatStruct('["a","b" , "c"]'))
        print("Classifier accuracy percent :", "{0:.3f}".format(value((FeatStruct('[1,2,3]')))))
コード例 #15
0
def remove_value_tag(feature):
    new_feature = FeatStruct()
    for i in feature.keys():
        if feature[i].has_key('__value__'):
            new_feature[i] = feature[i]['__value__']
        else:
            new_feature[i] = remove_value_tag(feature[i])
    return new_feature
コード例 #16
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def test_contain(fs1, fs2):
    """
    Test if one feature structure contains another, i.e. is the super set of another.

    :param fs1: The first feature structure you want to test
    :type fs1: FeatStruct
    :param fs2: The second feature structure you wang to test
    :type fs2: FeatStruct
    :return: 0 if they are equal to each other
             1 if fs1 is a subset of fs2
            -1 if fs2 is a subset of fs1
    FeatStruct if there is some intersection
          None if there is no intersection

          One exception is that, if the two fs are both empth then we will return
          equal instead of None.
    :rtype: integer/FeatStruct/None

    This function requires that fs1 and fs2 are leaf nodes, if they are not then an
    an exception will be raised. Besides, since in a leaf node the left hand side
    is actually derivable from the right hang side, so if we know one we can know
    another. Based on this observation we just make comparisions to the left hand
    side, i.e. keys().
    """
    if test_leaf(fs1) == False or test_leaf(fs2) == False:
        raise ValueError('Two arguments must be leaf nodes.')
    key_1 = fs1.keys()
    key_2 = fs2.keys()
    new_key_1 = []
    new_key_2 = []
    for i in key_1:
        if i in key_2:
            new_key_1.append(i)
    for i in key_2:
        if i in key_1:
            new_key_2.append(i)
    len_1 = len(key_1)
    len_2 = len(key_2)
    new_len_1 = len(new_key_1)
    new_len_2 = len(new_key_2)
    # Now new_len_1 and new_len_2 are the same keys in both fs, or both []
    # means no same keys
    #print new_key_1
    #print new_key_2
    if new_len_1 == 0 and new_len_2 == 0:
        return None
    elif len_1 != new_len_1 and len_2 != new_len_2:
        ret = FeatStruct()
        for i in new_key_1:
            ret[i] = fs1[i]
        return ret
    elif len_1 == new_len_1 and len_2 != new_len_2:
        return 1  # len_1 not changed, it is contained in len_2
    elif len_1 != new_len_1 and len_2 == new_len_2:
        return -1  # len_2 contained in len_1
    else:
        return 0  # Neigher has changed, so they are equal
コード例 #17
0
def add_two_feature(features, l_id, rhs, l_feature1, l_feature2=None):
    if l_feature2 == None:
        if features.has_key(l_id):
            features[l_id][l_feature1] = rhs
        else:
            features[l_id] = FeatStruct()
            features[l_id][l_feature1] = rhs
    else:
        if features.has_key(l_id):
            if features[l_id].has_key(l_feature1):
                features[l_id][l_feature1][l_feature2] = rhs
            else:
                features[l_id][l_feature1] = FeatStruct()
                features[l_id][l_feature1][l_feature2] = rhs
        else:
            features[l_id] = FeatStruct()
            features[l_id][l_feature1] = FeatStruct()
            features[l_id][l_feature1][l_feature2] = rhs
    return
コード例 #18
0
def fifth_pass(xtag_trees):
    """
    fifth_pass() -> list

    Given the result of fourth_pass(), this function will continue
    to build the feature structure, and in this phase we must add all values
    even if they are not defined by the tree grammar.  
    """
    for xtag_entry in xtag_trees:
        features = xtag_entry[4]
        for feature_entry in xtag_entry[1]:
            lhs = feature_entry[0]
            rhs = feature_entry[1]
            l_separator = lhs.find(':')
            r_separator = rhs.find(':')

            if r_separator != -1:
                l_id = lhs[:l_separator]
                r_id = rhs[:r_separator]
                r_feature = rhs[r_separator + 2:-1]
                print r_feature
                l_space = lhs.find(' ')

                if not features.has_key(
                        r_id):  # Make sure features[r_id] exists
                    features[r_id] = FeatStruct()
                    features[r_id][r_feature] = FeatStruct(__value__='')
                elif not features[r_id].has_key(
                        r_feature
                ):  # Make sure features[r_id][r_feature] exists
                    features[r_id][r_feature] = FeatStruct(__value__='')

                if (l_space == -1):
                    l_feature = lhs[l_separator + 2:-1]
                    add_two_feature(features, l_id, features[r_id][r_feature],
                                    l_feature)
                else:
                    l_feature1 = lhs[l_separator + 2:l_space]
                    l_feature2 = lhs[l_space + 1:-1]
                    add_two_feature(features, l_id, features[r_id][r_feature],
                                    l_feature1, l_feature2)

    return xtag_trees
コード例 #19
0
def create_fact(
    pred: FeatStruct, arg_0: Union[List[FeatStruct], FeatStruct], arg_1: Union[List[FeatStruct], FeatStruct]
) -> FeatStruct:
    """
    Construct feature structure than represent fact. 
    ex: know(Gael, [Bas, Justine]), return a feature structure of the type :
        [pred=[sem=know],
         arg0 = [head=Gael, tail=na],
         arg1 = [head=Bas, tail=[head=Justine, tail=na]]]
    """
    arg_0 = arg_0 if isinstance(arg_0, list) else [arg_0]
    arg_1 = arg_1 if isinstance(arg_1, list) else [arg_1]
    return FeatStruct(arg0=format_list(arg_0), arg1=format_list(arg_1), pred=pred)
コード例 #20
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def parse_feature_in_catalog(s):
    # This function parses the string in catalog file, i.e. english.gram
    # with the option 'start-feature', into a FeatStruct. We MUST write
    # separate parsers for different strings from different files, since
    # these features are represented in different forms.
    """
    :param s: The string repersenting start feature in the catalog file
    :type s: str
    :return: A feature structure which is the start feature
    :rtype: FeatStruct

    Given the string, this function will return a feature structure parsed
    from that string. The feature structure should be encoded like this:

    <mode> = ind/imp <comp> = nil <wh> = <invlink>  <punct term> = per/qmark/excl <punct struct> = nil
    
    All tokens shall be separated by a single space, no comma and period and
    semicolon is used. This parses is designed specially for the string from
    the catalog (i.e. english.gram) file, since there are multiple ways to
    represent the FS in xtag grammar, so we need multiple parsers.
    """
    # token _list is a list of tuples, the element of which is the LHS and
    # RHS of a feature structure definition, i.e. [('mode','ind/imp'),('comp','nil')]
    token_list = []
    while True:
        equal_sign = s.find('=')
        if equal_sign == -1:
            break
        # find between '=' and '<', which is the RHS if no "<LHS> = <RHS>"
        # is used. If it is then we can know that the no-white string between
        # '=' and '<' is an empty string.
        angle_bracket = s.find('<', equal_sign)
        if angle_bracket == -1:
            rhs = s[equal_sign + 1:].strip()
        else:
            rhs = s[equal_sign + 1:angle_bracket].strip()
            if rhs == '':
                angle_bracket = s.find('<', angle_bracket + 1)
                if angle_bracket == -1:
                    rhs = s[equal_sign + 1:].strip()
                else:
                    rhs = s[equal_sign + 1:angle_bracket].strip()
        lhs = s[:equal_sign].strip()[1:-1]
        token_list.append((lhs, rhs))
        s = s[angle_bracket:]

    new_fs = FeatStruct()
    for token in token_list:
        add_new_fs(new_fs, token[0].split(), token[1])

    return new_fs
コード例 #21
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def add_new_fs(fs, lhs, rhs, ref=0):
    """
    :param fs: The feature structure that we are going to add to it.
    :type fs: FeatStruct
    :param lhs: The path defined for the new node
    :type lhs: list(str)
    :param rhs: The value of the node
    :type rhs: str / Any object
    :param ref: Controls whether rhs should be treated as a string or other object
    :type ref: 0 / 1

    This function will add the feature structure defined by lhs and rhs
    into an existing feature fs. The lhs of the lowest level is defined
    to be '__or_' + rhs to facilitate other procedures.

    If any of the paths defined by lhs has already existed in fs, then
    it will be merged into that existing path, instead of erasing the existing
    one and make a new one, so it is safe to use this function to merge two
    feature structures.

    For example,
    
    fs = [a = ['www']]
    lhs = ['a','b','c','d','e']
    rhs = 'wzq'
    ->
    [a = [['www']                                ]
    [    [b = [c = [d = [e = [__or_wzq = 'wzq']]]]
    """
    if len(lhs) == 1:
        #inner = FeatStruct()
        #inner['__value__'] = rhs
        #fs[lhs[0]] = inner
        # ref means reference. If we are not making reference, then rhs must
        # be a string, and we will process that string
        # But if ref == 1 then we are just making references, so we will not
        # process rhs, but only attach it to the existing feature structure
        if ref == 0:
            fs[lhs[0]] = make_rhs_using_or(rhs)
        elif ref == 1:
            fs[lhs[0]] = rhs
        else:
            raise ValueError('Undefined ref value %d' % (ref))
    else:
        if fs.has_key(lhs[0]):
            add_new_fs(fs[lhs[0]], lhs[1:], rhs, ref)
        else:
            fs[lhs[0]] = FeatStruct()
            add_new_fs(fs[lhs[0]], lhs[1:], rhs, ref)
    return
コード例 #22
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def make_fs(lhs, rhs, ref=0):
    # This function makes a feature structure using a list of lhs which are nested
    # e.g. if lhs = ['a','b','c','d'] and rhs = 'wzq' then the
    # fs shoule be [a = [b = [c = [d = 'wzq']]]]
    """
    :param lhs: The path on the left hand side
    :type lhs: list(str)
    :param rhs: The string on the right hand side / Any object
    :type rhs: str / object
    :param ref: Control whether to treat rhs as a string or as an abitrary object
    :type ref: 0 / 1
    :return: A constructed feature structure
    :rtype: FeatStruct
    
    Given the path and the right hand side of a feature structure this function
    will return a feature structure exactly has the path defined in lhs and the
    value inside it is the rhs. There are two choices, we can either pass in a
    string as the rhs to let the code to deal with the 'or' problem. or just
    pass in an object and the code will not touch that (ref = 1 needed).
    
    lhs = ['a','b','c','d']
    rhs = 'wzq'
    ->FeatStruct = [a = [b = [c = [d = [__or_wzq = 'wzq']]]]]
    """
    new_fs = FeatStruct()

    if len(lhs) == 1:
        #inner = FeatStruct()
        #inner['__value__'] = rhs
        #rhs = inner
        # if ref == 0 then we are not making references so we will process
        # rhs, and it must be a string
        if ref == 0:
            rhs = make_rhs_using_or(rhs)
        elif ref == 1:
            pass  # Do nothing
        else:
            raise ValueError('Undefined ref value %d' % (ref))

        new_fs[lhs[0]] = rhs
    else:
        new_fs[lhs[0]] = make_fs(lhs[1:], rhs, ref)  # Recursively call

    return new_fs
コード例 #23
0
def debug():
    a = FeatStruct()
    b = FeatStruct()
    c = FeatStruct()
    d = FeatStruct()
    a['__value__'] = 'OKWANGZiqi'
    b['__value__'] = 'WANGYunpeng'
    c['__value__'] = "WWA!!!"
    d['__value__'] = 'WZQ(*&YTG'
    e = FeatStruct()
    e['first'] = a
    e['second'] = b
    e['third'] = c
    e['fourth'] = d
    f = FeatStruct()
    f['nested'] = e
    g = FeatStruct()
    g['__value__'] = "WAAAAAAAAH!"
    f['single'] = g
    print f
    print ''
    print remove_value_tag(match_feature(f, 'i', 1))
コード例 #24
0
def get_timex_featstructs(root):
    timexs = get_timexs(root)
    return [FeatStruct(t.attrib) for t in timexs]
コード例 #25
0
ファイル: cfg.py プロジェクト: DrDub/icsisumm
 def __hash__(self):
     self.freeze()
     return FeatStruct.__hash__(self)
コード例 #26
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
    path_1 = get_all_path(fs1)
    for i in path_1:
        item_2 = get_element_by_path(fs2, i)
        if item_2 == None:
            item_1 = get_element_by_path(fs1, i)
            add_new_fs(new_fs, i, item_1, 1)

    if tree1 != None:
        correct_other_nodes(correction_list, tree1)
    if tree2 != None:
        correct_other_nodes(correction_list, tree2)

    return new_fs


fs1 = FeatStruct()
fs2 = FeatStruct()
fs3 = FeatStruct()
fs4 = FeatStruct()
fs4['more'] = fs3
fs2['__or_a'] = 'a'
fs2['__or_wzq'] = 'wzq'
fs2['__or_qwe'] = 'qwe'
fs1['apple'] = fs2
fs1['orange'] = fs4
fs3['__or_zxcv'] = 'zxcv'
fs3['__or_4567'] = '4567'
debug_start_feature = parse_feature_in_catalog(
    '<mode> = ind/imp <comp> = nil <wh> = <invlink>  <punct term> = per/qmark/excl <punct struct> = nil'
)
empty_feature = FeatStruct()
コード例 #27
0
 def __hash__(self):
     self.freeze()
     return FeatStruct.__hash__(self)
コード例 #28
0
class Terminal:
    def __init__(self,name,value,features):
        self.name = name
        self.value = value
        self.features = features
        
nodes = []

tree = ET.parse('65.xml')
root = tree.getroot()

for sentence in root.iter('Sentence'):
    for tree in sentence.iter('Tree'):
        for node in tree.iter('Node'):
            if 'Unicode' in node.keys():
                features = FeatStruct()
                for key in node.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(node.get(key))+'\']'))
                nodes.append(Terminal(node.get('Cat'),node.get('Unicode'),features))
            else:
                name = str(node.get('Cat')) + ('_'+ str(node.get('Rule')) if node.get('Rule')!=None else '') + ('_' + str(node.get('ClType')) if node.get('ClType')!=None else '')
                children = []
                for child in node.getchildren():
                    children.append(child.get('Cat') + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
                nodes.append(NonTerminal(name,children))
    nodes.append('------')
            
def printCFG(nodes):
    s = ''
    for node in nodes:
コード例 #29
0
ファイル: feature.py プロジェクト: sfu-natlang/xtag-nltk
def special_unify(fs1, fs2, tree1=None, tree2=None):
    """
    :param fs1: One of the feature structures you want to unify
    :type fs1: FeatStruct
    :param fs2: Another feature structure
    :type fs2: FeatStruct
    :param tree1: The tree that you want to restore inter-node reference
    :type tree1: TAGTree
    :param tree2: Another tree, optional.
    :type tree2: TAGTree

    This function will do a unify just like what the normal unify() does, but
    in addition to a normal unification we also have the following features:

    1. Disjunction is considered, e.g. [x = a/b/c] and [x = b/c/d] should yield
    [x = b/c]; [x = a/b/c] and [x = a/b] should yield [x = a/b]
    2. The result is a new feature structure, but the leaf node is not new;
    actually we will make reference to the leaf nodes in fs1 and fs2
    3. When the path and the value are both the same, we will make new nodes,
    and then fix the references in the trees given by parameters tree1 and tree2
    to make the entry point to the new node.
    """
    new_fs = FeatStruct()
    correction_list = []
    path_2 = get_all_path(fs2)  # To save time, no path_1
    for i in path_2:
        item_1 = get_element_by_path(fs1, i)
        if item_1 == None:
            item_2 = get_element_by_path(fs2, i)
            add_new_fs(new_fs, i, item_2,
                       1)  # ref == 1, we only do reference!!
        else:
            item_2 = get_element_by_path(fs2, i)
            tc = test_contain(
                item_1, item_2)  # Single entry is the same as multiple entry
            if tc == 1:  # t1 is a subset of t2, we always use the smaller one
                add_new_fs(new_fs, i, item_1, 1)
            elif tc == -1:
                add_new_fs(new_fs, i, item_2, 1)
            elif tc == 0:  # Two entries are the same, we create a new one
                corr_check = search_correction(correction_list, item_1)
                if corr_check == None:
                    new_entry = copy.deepcopy(item_1)
                    # This tuple is used to correct the reference in tree(s)
                    # Enumerating all paths, check whether the id of the value is
                    # equal to either item1 or item2, if it is then change the
                    # reference to new_entry
                    correction_tuple = (new_entry, item_1, item_2)
                    correction_list.append(correction_tuple)
                else:
                    # The return value is the new entry stored if three is not None
                    new_entry = corr_check
                # Add new reference (new entry or existing entry)
                add_new_fs(new_fs, i, new_entry, 1)
            elif tc == None:
                return None  # Conflict
            # Partial intersection, return value is a new FeatStruct only contains
            # the intersection. But we do not need to correct this, since it
            # is brand-new
            else:
                add_new_fs(new_fs, i, tc, 1)
            #if i[0] == 'comp': print tc

    # We do not need to check when item_2 != None, because we have already
    # done it in the first loop. In other words, we have processed the overlapping
    # paths, and what is left is to add those in fs1 but not in fs2 into the
    # new feature structure
    path_1 = get_all_path(fs1)
    for i in path_1:
        item_2 = get_element_by_path(fs2, i)
        if item_2 == None:
            item_1 = get_element_by_path(fs1, i)
            add_new_fs(new_fs, i, item_1, 1)

    if tree1 != None:
        correct_other_nodes(correction_list, tree1)
    if tree2 != None:
        correct_other_nodes(correction_list, tree2)

    return new_fs
コード例 #30
0
def featStruct(gapUp,
               semUp,
               varUp,
               arriveFlag=False,
               destVpFlag=False,
               sourceVpFlag=False,
               busNameNpFlag=False,
               destNpFlag=False,
               departFlag=False,
               departVpFlag=False):
    gap = Variable('?gap')

    if arriveFlag and not (destVpFlag) and not (sourceVpFlag):
        vp = FeatStruct(arrive=FeatStruct(
            a=Variable('?a'),
            f=Variable('?f'),
            t=FeatStruct(t_var=Variable('?t'), time_var=Variable('?time'))))
    elif departFlag and departVpFlag:
        vp = FeatStruct(depart=FeatStruct(
            d=Variable('?d'),
            f=Variable('?fDep'),
            t=FeatStruct(t_var=Variable('?t_var_dep'),
                         time_var=Variable('?timeDepart'))),
                        source=FeatStruct(bus=Variable('?h'),
                                          sourceName=FeatStruct(
                                              f=Variable('?h'),
                                              name=Variable('?nameSource'))))
    else:
        vp = FeatStruct(
            depart=FeatStruct(d=Variable('?d'),
                              f=Variable('?fDep'),
                              t=FeatStruct(t_var=Variable('?t_var_dep'),
                                           time_var=Variable('?timeDepart'))),
            source=FeatStruct(bus=Variable('?h'),
                              sourceName=FeatStruct(
                                  f=Variable('?h'),
                                  name=Variable('?nameSource'))),
            arrive=FeatStruct(a=Variable('?a'),
                              f=Variable('?fArr'),
                              t=FeatStruct(t_var=Variable('?t_var_arr'),
                                           time_var=Variable('?timeArrive'))),
            dest=FeatStruct(destName=FeatStruct(
                f=Variable('?f'),
                name=FeatStruct(h=Variable('?hDest'),
                                name=Variable('?nameDest')))))

    if destNpFlag and not (busNameNpFlag):
        np = FeatStruct(dest=FeatStruct(
            bus=Variable('?f'),
            dest=FeatStruct(f=Variable('?f'),
                            name=FeatStruct(h=Variable('?h'),
                                            name=Variable('?name')))))
    else:
        np = FeatStruct(
            the=FeatStruct(bus=Variable('?b'),
                           busname=FeatStruct(h=Variable('?h_BusName'),
                                              name=Variable('?busName'))))

    wh = FeatStruct(
        whType=FeatStruct(f=Variable('?f'), type=Variable('?type')))
    sem = FeatStruct(query=FeatStruct(vp=vp, np=np, wh=wh))
    var = Variable('?a')

    para = FeatStruct(gap=gap, sem=sem, var=var)
    paraUpdate = FeatStruct(gap=gapUp, sem=semUp, var=varUp)
    # print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    # print(paraUpdate)
    # paraUpdate.unify(para)['sem']['query']['vp']['arrive']['f']
    return paraUpdate.unify(para)
コード例 #31
0
ファイル: factors.py プロジェクト: HioLeong/IrnBru
def get_trigram_featstruct(trigram_tuple):
    return FeatStruct(x=trigram_tuple[0],y=trigram_tuple[1],z=trigram_tuple[2])
コード例 #32
0
def analyze_template(s):
    # The return value of this function is a tuple. The first element of the tuple is a dictionary
    # using identifiers from morph.flat, and the entries are feature structures
    # with proper values set. The second element is a dictionary using keys from
    # syntax-coded.flat, which will return a list containing all feature structures
    # from a given identifier.
    lines = s.splitlines()
    feature_list = {}
    feature_list2 = {}
    for l in lines:
        #print l
        l = l.strip()
        if l == '' or l[0] == ';':
            continue
        
        index = l.find(';')
        if index != -1:
            l = l[:index]
            l = l.strip()
        
        if l[0] == '@':
            if l[-1] != '!':
                raise TypeError("Each line should be terminated with a '!'")
            l = l[1:-1]
            # we only split the name and the other part
            temp = l.split(None,1)
            name = temp[0]
            l = l[len(name):].strip()
            features = l.split(',')

            fs = FeatStruct()
            for f in features:
                f = f.strip()
                index = f.find('=')
                if f[0] == '@' and feature_list.has_key(f[1:]):
                    fs = fs.unify(feature_list[f[1:]])    # unify() does not change in-place
                elif index != -1:
                    lhs = f[:index].strip()
                    rhs = f[index + 1:].strip()
                    if rhs[0] == '@':      # rhs can also be reference
                        rhs = feature_list[rhs[1:]]
                    if lhs[0] != '<' or lhs[-1] != '>':
                        raise TypeError('The left hand side of a feature structure must be wrapped with <>')
                    lhs = lhs[1:-1]
                    path = lhs.split()
                    #path.reverse()    # This method is in-place
                    fs = fs.unify(make_fs(path,rhs))
            feature_list[name] = fs
            #print name
            #print fs,'\n'
        elif l[0] == '#':
            if l[-1] != '!':
                raise TypeError('Invalid input line, must be terminated by "!"')
            l = l[1:-1]
            tokens = l.split(None,1)     # Split for once using space character
            word_pos = tokens[0].strip()

            features = tokens[1].split(',')
            new_fs = FeatStruct()
            for fs in features:
                tokens = fs.split(':',1)
                node_type = tokens[0].strip()
                tokens = tokens[1].split('=',1)
                lhs = tokens[0].strip()[1:-1]    # Remove <>
                rhs = tokens[1].strip()
                lhs = lhs.split()
                if new_fs.has_key(node_type):
                    new_fs[node_type] = new_fs[node_type].unify(make_fs(lhs,rhs))
                else:
                    new_fs[node_type] = make_fs(lhs,rhs)
            if feature_list2.has_key(word_pos) == False:
                feature_list2[word_pos] = new_fs
            else:
                #feature_list2[word_pos].append(new_fs)
                raise KeyError('Duplicate defitinion detected: %s.' % (word_pos))
        else:
            raise TypeError('Cannot recognize line: %s.' % (l))
    return (feature_list,feature_list2)
コード例 #33
0
    pred: FeatStruct, arg_0: Union[List[FeatStruct], FeatStruct], arg_1: Union[List[FeatStruct], FeatStruct]
) -> FeatStruct:
    """
    Construct feature structure than represent fact. 
    ex: know(Gael, [Bas, Justine]), return a feature structure of the type :
        [pred=[sem=know],
         arg0 = [head=Gael, tail=na],
         arg1 = [head=Bas, tail=[head=Justine, tail=na]]]
    """
    arg_0 = arg_0 if isinstance(arg_0, list) else [arg_0]
    arg_1 = arg_1 if isinstance(arg_1, list) else [arg_1]
    return FeatStruct(arg0=format_list(arg_0), arg1=format_list(arg_1), pred=pred)


# STATIC LIST OF ALL PREDICATES
know = FeatStruct(sem="know")
know_r = FeatStruct(sem="know_r")
member = FeatStruct(sem="member")

# INPUT DATA
Gael = FeatStruct(proper="Gael", is_proper=True, explicit=True, is_noun=False, gender="male", form="singular")
Bas = FeatStruct(proper="Bas", is_proper=True, explicit=True, is_noun=False, gender="male", form="singular")
Justine = FeatStruct(proper="Justine", is_proper=True, explicit=True, is_noun=False, gender="female", form="singular")
club = FeatStruct(
    proper="na", is_proper=False, is_noun=True, explicit=True, noun="tennis_club", gender="neuter", form="singular"
)

# Dynamic rules that are specific to this input data
# for instance "ProperName[proper=Bas] -> "Bas"
dynamic_productions = [entity_specific_rule(entity) for entity in [Gael, Bas, Justine, club]]
コード例 #34
0
def mainLogic(doc):
    departFlag = False
    departVpFlag = False
    arriveFlag = False
    sourceVpFlag = False
    destVpFlag = False
    busNameNpFlag = False
    destNpFlag = False
    d = ''
    t = ''
    a = ''
    time = ''
    nameDepart = ''
    nameArrive = ''
    bVar = ''
    h_BusName = ''
    busName = ''
    timeDepart = ''
    cityTokenText = ['Hồ_Chí_Minh', 'Hà_Nội', 'Huế', 'Đà_nẵng', 'Đà_Nẵng']
    busTokenText = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8']
    cityTokenDep = ['compound', 'nmod', 'obl']

    (f, typeWh) = ('f2', 'WHICH1') if (
        subtree_matcher(doc, 'det', text='nào') != []) else ('h1', 'HOWLONG1')

    if (typeWh == 'WHICH1'):
        gap = f
    else:
        #Runtime (HOWLONG1 case)
        gap = 'r2'

    if (gap == 'f2'):
        if subtree_matcher(doc, 'case', text='đến') != [] or subtree_matcher(
                doc, 'ccomp', text='đến') != []:
            arriveFlag = True
            a = 'a3'
            # time=subtree_matcher(doc,'nummod')[0] if (len(subtree_matcher(doc,'nummod'))==1) else subtree_matcher(doc,'nummod')
            # print([i.text for i in searchChild(doc,'ROOT')])
            try:
                time = [
                    i.text for i in searchChild(doc, 'ROOT') if 'HR' in i.text
                ][0]
            except:
                time = ''
            if time != '':
                t = 't2'
            else:
                t = '?t'
            ################################################
            if subtree_matcher(doc, 'ROOT', text='đi') != []:
                departFlag = True
                d = 'd3'
                for cT in cityTokenText:
                    for cD in cityTokenDep:
                        temp = subtree_matcher(doc, cD, cT)
                        tempHead = checkHead(doc, temp)
                        try:
                            tempChild = [i.text for i in searchChild(doc, cD)]
                        except:
                            tempChild = ''

                        if (temp != []) and (tempHead != 'đi'):
                            destVpFlag = True
                            nameArrive = temp
                        elif (temp != []) and (tempHead
                                               == 'đi') and ('từ'
                                                             in tempChild):
                            sourceVpFlag = True
                            nameDepart = temp
                        elif (temp != []) and (tempHead
                                               == 'đi') and ('đến'
                                                             in tempChild):
                            destVpFlag = True
                            nameArrive = temp
            # elif subtree_matcher(doc,'ROOT',text='xuất_phát')!=[]:
            #     departFlag=True
            #     d='d3'
            #     for cT in cityTokenText:
            #         for cD in cityTokenDep:
            #             temp=subtree_matcher(doc,cD,cT)
            #             tempHead=checkHead(doc,temp)
            #             # try:
            #             #     tempChild=[i.text for i in searchChild(doc,cD)]
            #             # except:
            #             #     tempChild=''
            #             if (temp !=[]) and (tempHead!='xuất_phát'):
            #                 destVpFlag=True
            #                 nameDepart= temp
            #             # elif (temp !=[]) and (tempHead=='xuất_phát') and ('từ' in tempChild):
            #             #     sourceVpFlag=True
            #             #     nameDepart= temp
            #             # elif (temp !=[]) and (tempHead=='xuất_phát') and ('đến' in tempChild):
            #             #     destVpFlag=True
            #             #     nameArrive= temp
            else:
                for cT in cityTokenText:
                    for cD in cityTokenDep:
                        temp = subtree_matcher(doc, cD, cT)
                        if temp != []:
                            destNpFlag = True
                            nameArrive = temp
                            break
                    else:
                        # Continue if the inner loop wasn't broken.
                        continue
                        # Inner loop was broken, break the outer.
                    break
        elif subtree_matcher(doc, 'ROOT', text='xuất_phát') != []:
            departFlag = True
            d = 'd3'
            for cT in cityTokenText:
                for cD in cityTokenDep:
                    temp = subtree_matcher(doc, cD, cT)
                    tempHead = checkHead(doc, temp)
                    # try:
                    #     tempChild=[i.text for i in searchChild(doc,cD)]
                    # except:
                    #     tempChild=''
                    if (temp != []) and (tempHead != 'xuất_phát'):
                        departVpFlag = True
                        nameDepart = temp
                    # elif (temp !=[]) and (tempHead=='xuất_phát') and ('từ' in tempChild):
                    #     sourceVpFlag=True
                    #     nameDepart= temp
                    # elif (temp !=[]) and (tempHead=='xuất_phát') and ('đến' in tempChild):
                    #     destVpFlag=True
                    #     nameArrive= temp
        # print(destNpFlag)
        # Variable('?hDest'),name=Variable('?nameDest')
    elif (gap == 'r2'):
        if (subtree_matcher(doc, 'ROOT', text='đến')) != []:
            arriveFlag = True
            a = 'a3'

            time = '?time'
            t = '?t'

            nameArrive = subtree_matcher(doc, 'obj')[0] if (len(
                subtree_matcher(doc, 'obj')) == 1) else subtree_matcher(
                    doc, 'obj')
            if type(nameArrive) == list:
                for obj in nameArrive:
                    if obj in cityTokenText:
                        nameArrive = obj
                        break

            if nameArrive != '':
                h = 'h4'
                destVpFlag = True
            else:
                h = '?h'

            if subtree_matcher(doc, 'case', text='từ') != []:
                d = 'd3'
                nameDepart = checkHead(doc, 'từ')
                if (nameDepart != ''):
                    sourceVpFlag
            listObj = subtree_matcher(doc, 'obj')
            listCompound = subtree_matcher(doc, 'compound')

            for sub in listObj:
                if sub in busTokenText:
                    busName = sub

            if busName == '':
                for sub in listCompound:
                    if sub in busTokenText:
                        busName = sub

            if busName != '':
                busNameNpFlag = True
                bVar = 'f2'
                h_BusName = 'h3'

    if arriveFlag and not (destVpFlag) and not (sourceVpFlag):
        vp = FeatStruct(
            arrive=FeatStruct(a=a, f=f, t=FeatStruct(t_var=t, time_var=time)))
    elif departFlag and departVpFlag:
        vp = FeatStruct(
            depart=FeatStruct(d='d3',
                              f='f1',
                              t=FeatStruct(t_var=t, time_var=time)),
            source=FeatStruct(bus='h3',
                              sourceName=FeatStruct(f=Variable('?h'),
                                                    name=nameDepart)))
    else:
        vp = FeatStruct(
            depart=FeatStruct(d='d3',
                              f='f1',
                              t=FeatStruct(t_var=t, time_var=time)),
            source=FeatStruct(bus='h4',
                              sourceName=FeatStruct(f=Variable('?h'),
                                                    name=nameDepart)),
            arrive=FeatStruct(a='a3',
                              f='f2',
                              t=FeatStruct(t_var=t, time_var=time)),
            dest=FeatStruct(destName=FeatStruct(
                f=Variable('?f'), name=FeatStruct(h='h6', name=nameArrive))))

    if destNpFlag and not (busNameNpFlag):
        np = FeatStruct(dest=FeatStruct(
            bus=Variable('?f'),
            dest=FeatStruct(f=Variable('?f'),
                            name=FeatStruct(h='h3', name=nameArrive))))
    else:
        np = FeatStruct(the=FeatStruct(
            bus=bVar, busname=FeatStruct(h=h_BusName, name=busName)))

    wh = FeatStruct(whType=FeatStruct(f=f, type=typeWh))
    sem = FeatStruct(query=FeatStruct(vp=vp, np=np, wh=wh))
    var = Variable('?a')

    result = featStruct(gap,
                        sem,
                        var,
                        arriveFlag=arriveFlag,
                        destVpFlag=destVpFlag,
                        sourceVpFlag=sourceVpFlag,
                        busNameNpFlag=busNameNpFlag,
                        destNpFlag=destNpFlag,
                        departFlag=departFlag,
                        departVpFlag=departVpFlag)
    print(result)
    return result
コード例 #35
0
ファイル: LWFGParser.py プロジェクト: gauravahuja/LWFGParser
    def _applyConstraints(self, parent, child):
        """ Constraint step:
		Given a satisfied state ``child'' and the updated state ``parent,'' compositional constraints
		are applied in the form of unification between the semantic heads of the two states.
		A successful unification implies the two states are compatible given the compositional 
		constraints.
		In which case, the semantic bodies are merged and updated to reflect the unification.
		Lastly, the string of the ``parent'' is updated.

		Because unification is a tricky process, care needs to be taken to ensure that it is properly
		performed.
		Variables that have the same name are assumed to be the same, but because the grammar generates
		feature structures for compositional constraints using a small set of variable names, often
		variables can have the same name despite their being independent entities.
		One workaround is to rename the variables in one of the semantic heads before unification.
		It is important that the renamed variables are updated in the semantic bodies as well.
	"""
        if LWFG.is_terminal(child.prod.lhs()):	# child is a special lexical state, and thus contains no semantic info
            return [True, parent]		# skip

	###################################################
	# Unification of Semantic Heads
	##################################################

	# The compositional constraints are represented in such a way that there a separate set of constraints for each
	# term in the production rule.
	# Each set of constraints is a separate feature structure.
	# The set of constraints for the LHS are indexed with a feature identifier: `h'
	# The set of constraints for the ith term in the RHS are indexed with a feature indentifier: `hi'
	#
	# Thus, when unification is performed, the constraints of the LHS of the child state (indexed by `h') need
	# to be retrieved.
	# Also, because NLTK unifies embedded feature structures only when their feature identifiers are the same,
	# we need to have the feature id of the retrieved set of constraints to match the corresponding feature id
	# in the parent state.
	# Again, that would be `hi' where $i$ is the index of the child's LHS in the parent's RHS.
        hidx = 'h'+str(parent.dotIdx)
        childHead = FS()  # make a new Feature Structure to get around the copy-by-ref issues
        childHead[hidx] = child.head['h']
    
        parentHead = parent.head
        
        # Rename variables in childHead to avoid confusion
        # step 0: check if child.body and parent.body share variable names, change any that are shared
        renamedVarMap1 = {}
        usedVars = []
        if parent.body:
            pBodVars = parent.body.variables()
            cBodVars = child.body.variables()
            for v in cBodVars:
                if v in pBodVars: # change v
                    nv = self._newVarName(v)
                    while nv in pBodVars+cBodVars:
                        nv = self._newVarName(nv)
                    usedVars.append(Variable(nv))
                    renamedVarMap1[Variable(v)] = Variable(nv)
                else: # not shared by parent
                    usedVars.append(Variable(v))
            for v in pBodVars:
                usedVars.append(Variable(v))
        else:
            for v in child.body.variables():
                usedVars.append(Variable(v))
                    

        # step 1: find used variables from parent's semantic head  
        usedVars += list(parentHead.variables())

        # step 2: rename variables in child's semantic head
        renamedVarMap2 = {}
        childHead = childHead.rename_variables(used_vars=usedVars, new_vars=renamedVarMap2)

	# check if features align with each other
        childFeats = set(childHead[hidx].keys())
        parentFeats = set(parentHead[hidx].keys())
        if not (parentFeats <= childFeats): # True if the relevant set of features of the parent state
					    # are **not** a subset of those of the child state.
            return [False, parent]	    # If True, the states are incompatible.
        
	# perform unification
        bindings = {}
        parentHead = parentHead.unify(childHead, bindings)
        if not parentHead: # failed to unify
            return [False, parent] # the states are incompatible

        # update pState's rule
        parent.head = parentHead

	###################################################
	# Updating Semantic Bodies
	##################################################
        childBody = LWFG.OntoSeR(str(child.body)) # create new sem body to get around copy-by-ref issues

        # The next part is a bit confusing and needs spelling out.
        # We want to change variables names in the child's semantic body that are shared with
        # the parent's semantic body.
        # However, we can't just do it willy-nilly, we need to ensure that only those variable names
        # that are not linked with the parent's body are changed.
        # For example, in "the smart girl" -- when "n -> det n" is completed, the body corresponding to "the"
        # is linked with the body corresponding to "smart girl".
        # If we change variables names without consideration, that link is lost.
        #
        # To maintain the link, a roundabout method is employed.
        # We first look at all renamed var names in "renamedVarMap2" -- which results from changing vars in
        # the child's semantic head.
        # We then look at all renamed names and see whether they were involved in unification with the parent's
        # head.
        # We can do that by going through the unification "bindings" and checking to see if the renamed names
        # are keys.
        # If they were involved in unification, we check to see if they were bound to a variable in the parent's
        # head that we were about to rename.
        # We can do that by looking to see if the bound variable is a key in "renamedVarMap1" -- which we got
        # from renaming variable names in the child's body that were present in the parent's body.

        for var in renamedVarMap2.keys():
            if renamedVarMap2[var] in bindings.keys() and bindings[renamedVarMap2[var]] in renamedVarMap1.keys():
                renamedVarMap1.pop(bindings[renamedVarMap2[var]])

        childBody.substituteBindings(renamedVarMap2)            
        childBody.substituteBindings(renamedVarMap1)

        if not parent.body:
##            print parent, child
##            print parent.body, child.body
##            print renamedVarMap1
##            print renamedVarMap2
##            print bindings
##            print " "
            parentBody = childBody
        else:
            
            parentBody = LWFG.OntoSeR(str(parent.body) + ',' + str(childBody))
        parentBody.substituteBindings(bindings)
        parent.body = parentBody

	###################################################
	# Updating Strings
	##################################################
        if not parent.string:
            parentString = child.string
        else:
            parentString = parent.string + ' ' + child.string
        parent.string = parentString
        
        return [True, parent]