def addNonTerminal(nodes,node):
    name=str(node.get('Cat')) + ('_'+ str(node.get('Rule')) if node.get('Rule')!=None else '') + ('_' + str(node.get('ClType')) if node.get('ClType')!=None else '')
    nodeNumber = whichNode(name,nodes)
    if nodeNumber == -1:
        children = []
        for child in node.getchildren():
            if 'Unicode' in child.keys():
                features = FeatStruct()
                for key in child.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(child.get(key))+'\']'))
                children.append('{' + str(child.get('Cat')) + '_' + stringFeatures(features) + '}')
                children.append(str(child.get('Cat')) + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
Beispiel #2
def get_instance_featstructs(root):
    instances = get_instances(root)
    events = get_events(root)
    for i in instances:
                        **{k: e.attrib[k]
                           for k in ['class', 'stem']},
                            'text': e.text
                    }) for e in events if e.get('eid') == i.get('eventID')))
        except KeyError:
                        **{k: e.attrib[k]
                           for k in ['class']},
                            'text': e.text
                    }) for e in events if e.get('eid') == i.get('eventID')))
    return [
        FeatStruct({k: i.attrib[k]
                    for k in set(i.keys()) - set(['eventID'])})
        for i in instances
def smartUnify(*featstructs):
    '''Unifies two or more feature structures based on what they have
    in common. For example, [person=1,number=2] and [case=3,number=2]
    will return [number=2], not [person=1,number=2,case=3] as
    featStruct.unify() would do. Return None if unable to unify.
    *featstructs -- Any number of feature structures to unify.
    # Create a list of lists of feature structures in each overall
    # structure. 
    lis = []
    for struct in featstructs:
        for item in struct:
    # Create a set based on the first structure list, and then form
    # the intersection of the remaining lists, leaving only what is
    # left in common between all lists.
    s = set(lis[0])
    f = FeatStruct()
    for l in lis:
        s = s.intersection(l)
    # Unify the set together to form the resulting FeatStruct
    for struct in featstructs:
        for i in struct:
            if i in s:
                f = f.unify(FeatStruct('['+str(i)+'='+str(struct[i])+']'))
                if f == None:
                    return None
    return f
Beispiel #4
def debug_test_contain():
    fs100 = FeatStruct()
    fs100['__or_wzq'] = 'wzq'
    #fs100['__or_qwe'] = 'qwe'
    fs101 = FeatStruct()
    fs101['__or_123'] = '123'
    fs101['__or_wzq'] = 'wzq'
    print test_contain(fs100, fs101)
def addTerminal(nodes,node):
    features = FeatStruct()
    for key in node.keys():
        if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
            features = features.unify(FeatStruct('['+str(key)+'=\''+str(node.get(key))+'\']'))
    name = '{' + str(node.get('Cat')) + '_' + stringFeatures(features) + '}'
    nodeNumber = whichNode(name,nodes)
    if nodeNumber == -1:
Beispiel #6
def match_feature(feature, regexp, operation=0):
    match_feature(feature,regexp,operation=0) -> FeatStruct

    This function is used to filter a feature structure with a regular exression.
    The regular expression should be written in the form that XTAG system uses,
    which has a '__value__' entry at the last level of indexing.

    feature: The feature that you would like to filter

    regexp: An acceptable regular expression by module re

    operation: 0 if positive filtering will be done, 1 if negative filtering.
    positive filtering means that all RHS values that match the regexp will be
    retained, while negative filtering means that all RHS values that doesn't
    match will be retained.
    new_feature = FeatStruct()
    count = 0
    for i in feature.keys():
        val = feature[i]
        if val.has_key('__value__'):
            #search_ret =,val['__value__'])
            search_ret =, i)
            if operation == 0 and search_ret != None:
                new_feature[i] = val
                count += 1
            elif operation == 1 and search_ret == None:
                new_feature[i] = val
                count += 1
            search_ret =, i)
            if operation == 0 and search_ret != None:
                new_feature[i] = val
                count += 1
            elif operation == 1 and search_ret == None:
                ret = match_feature(val, regexp, operation)
                if ret != None:
                    new_feature[i] = ret
                    new_feature[i] = FeatStruct()
                count += 1
            elif operation == 1 and search_ret != None:
                ret = match_feature(val, regexp, operation)
                #print ret,'\n'
                if ret != None:
                    new_feature[i] = ret
                    count += 1

    #print new_feature,'\n'
    if count == 0:
        return None
        return new_feature
Beispiel #7
 def topic_features(self, article):
     word_occurence = self.get_word_occurence(article)
     feature_list = FeatStruct(word_occurence)
     feature = FeatStruct(word_occurence=feature_list)
     return dict([(word, True) for word in word_occurence])
def make_fs(lhs,rhs):
    # This function makes a feature structure using a list of lhs which are nested
    # e.g. if lhs = ['a','b','c','d'] and rhs = 'wzq' then the
    # fs shoule be [a = [b = [c = [d = 'wzq']]]]
    new_fs = FeatStruct()
    if len(lhs) == 1:
        inner = FeatStruct()
        inner['__value__'] = rhs
        rhs = inner
        new_fs[lhs[0]] = rhs
        new_fs[lhs[0]] = make_fs(lhs[1:],rhs)
    return new_fs
Beispiel #9
def nested_frozen_fs(dictionary):
    if not isinstance(dictionary, FeatStruct):
        ret = FeatStruct()
        for k, v in dictionary.items():
            v_new = v
            if isinstance(v_new, set):
                v_new = frozenset(v_new)
            elif isinstance(v_new, dict):
                v_new = nested_frozen_fs(v_new)
            ret[k] = v_new
        return ret
    return dictionary
Beispiel #10
def make_rhs_using_or(rhs):
    :param rhs: The right hand string which may contain the 'or' relationship
    :type rhs: str

    :return: A feature structure using '__or_' structure
    :rtype: FeatStruct

    This function will return a feature structure which satisfies the
    requirement for implementing the 'or' relationship in the xtag grammar.
    rhs must be a string, whose value will be used to construct the lhs
    inside the new feature structure.

    For example,

    rhs = a/b/c ->  [ __or_a = a ]
                    [ __or_b = b ]
                    [ __or_c = c ]
    new_fs = FeatStruct()
    slash = rhs.find('/')
    if slash == -1:
        rhs = [rhs]
        rhs = rhs.split('/')
    # After this rhs is a list containing the entities in the 'or' relation
    for i in rhs:
        lhs = make_leaf_str(i)
        new_fs[lhs] = i
    return new_fs
def fourth_pass(xtag_trees):
    fourth_pass() -> list

    Given the result of third_pass(), this function will make
    use of FeatStruct, and build a feature structure dictionary.  
    for xtag_entry in xtag_trees:
        features = {}
        for feature_entry in xtag_entry[1]:
            lhs = feature_entry[0]
            rhs = feature_entry[1]
            l_separator = lhs.find(':')
            r_separator = rhs.find(':')

            if r_separator == -1:
                l_id = lhs[:l_separator]
                l_space = lhs.find(' ')

                feat_rhs = FeatStruct()
                feat_rhs["__value__"] = rhs
                #feat_rhs = rhs

                if (l_space == -1):
                    l_feature = lhs[l_separator + 2:-1]
                    add_two_feature(features, l_id, feat_rhs, l_feature)
                    l_feature1 = lhs[l_separator + 2:l_space]
                    l_feature2 = lhs[l_space + 1:-1]
                    add_two_feature(features, l_id, feat_rhs, l_feature1,

        xtag_entry[4] = features

    return xtag_trees
Beispiel #12
def _naive_unify(fstruct1:FeatStruct, fstruct2:FeatStruct):

    newfs = copy.copy(fstruct1)
    if _is_mapping(fstruct1) and _is_mapping(fstruct2):

    # Unify any values that are defined in both fstruct1 and
    # fstruct2.  Copy any values that are defined in fstruct2 but
    # not in fstruct1 to fstruct1.  Note: sorting fstruct2's
    # features isn't actually necessary; but we do it to give
    # deterministic behavior, e.g. for tracing.
        for fname, fval2 in sorted(fstruct2.items()):
            if fname in fstruct1:
                newfs[fname] = _naive_unify(fstruct1[fname], fval2)
                newfs[fname] = fval2

        return newfs # Contains the unified value.

    # Unifying two sequences:
    elif _is_sequence(fstruct1) and _is_sequence(fstruct2):
        # Concatenate the values !!
        # Don't unify corresponding values in fstruct1 and fstruct2.
        newfs += fstruct2
        newfs = tuple([t for t in newfs if not isinstance(t, Variable)])
        return newfs # Contains the unified value.

        return None
Beispiel #13
def remove_or_tag(feature):
    :param feature: The feature structure that you want to remove the '__or_' tag
    :type feature: FeatStruct

    :return: A new feature structure with "__or_" removed and combined
    :rtype: FeatStruct

    Given a feature structure in the internal repersentation of our xtag system
    (i.e. each leaf is wrapped with an '__or_' + lhs feature struct), this function
    will get rid of the __or_ tag, and produce a feature structure where no __or_
    is there, and the multiple or relation is represented as [__or_1]/[__or_2]/ ...

    e.g. for fs = [apple = [__or_a = 'a']]
                  [        [__or_b = 'b']]
                  [        [__or_c = 'c']]
    remove_or_tag(fs) will return:

      fs_return = [apple = 'a/b/c']
    new_feature = FeatStruct()
    for key in feature.keys():
        entry = feature[key]
        entry_keys = entry.keys()
        if test_leaf(entry) == True:
            str_or_removed = entry[entry_keys[0]]
            if len(entry_keys) > 1:
                for i in entry_keys[1:]:
                    str_or_removed += '/' + entry[i]

            new_feature[key] = str_or_removed
            new_feature[key] = remove_or_tag(feature[key])
    return new_feature
def find_probability(list1,list2,targword):
    for i in range(len(list1)):
        for j in range(len(list1[i])):
            for k in range(len(list1[i][j])):

    with + '/Senses00' + str(sensetag) + '.txt', encoding='utf-16') as f:


    #print(sense_word_list[sense_word_list.index('&'):sense_word_list.index('!') ])
        LinearSVC_classifier = SklearnClassifier(LinearSVC())
        print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, list2)) * 100)
        #classifier=nltk.NaiveBayesClassifier.train(FeatStruct('["a","b" , "c"]'))
        print("Classifier accuracy percent :", "{0:.3f}".format(value((FeatStruct('[1,2,3]')))))
Beispiel #15
def remove_value_tag(feature):
    new_feature = FeatStruct()
    for i in feature.keys():
        if feature[i].has_key('__value__'):
            new_feature[i] = feature[i]['__value__']
            new_feature[i] = remove_value_tag(feature[i])
    return new_feature
Beispiel #16
def test_contain(fs1, fs2):
    Test if one feature structure contains another, i.e. is the super set of another.

    :param fs1: The first feature structure you want to test
    :type fs1: FeatStruct
    :param fs2: The second feature structure you wang to test
    :type fs2: FeatStruct
    :return: 0 if they are equal to each other
             1 if fs1 is a subset of fs2
            -1 if fs2 is a subset of fs1
    FeatStruct if there is some intersection
          None if there is no intersection

          One exception is that, if the two fs are both empth then we will return
          equal instead of None.
    :rtype: integer/FeatStruct/None

    This function requires that fs1 and fs2 are leaf nodes, if they are not then an
    an exception will be raised. Besides, since in a leaf node the left hand side
    is actually derivable from the right hang side, so if we know one we can know
    another. Based on this observation we just make comparisions to the left hand
    side, i.e. keys().
    if test_leaf(fs1) == False or test_leaf(fs2) == False:
        raise ValueError('Two arguments must be leaf nodes.')
    key_1 = fs1.keys()
    key_2 = fs2.keys()
    new_key_1 = []
    new_key_2 = []
    for i in key_1:
        if i in key_2:
    for i in key_2:
        if i in key_1:
    len_1 = len(key_1)
    len_2 = len(key_2)
    new_len_1 = len(new_key_1)
    new_len_2 = len(new_key_2)
    # Now new_len_1 and new_len_2 are the same keys in both fs, or both []
    # means no same keys
    #print new_key_1
    #print new_key_2
    if new_len_1 == 0 and new_len_2 == 0:
        return None
    elif len_1 != new_len_1 and len_2 != new_len_2:
        ret = FeatStruct()
        for i in new_key_1:
            ret[i] = fs1[i]
        return ret
    elif len_1 == new_len_1 and len_2 != new_len_2:
        return 1  # len_1 not changed, it is contained in len_2
    elif len_1 != new_len_1 and len_2 == new_len_2:
        return -1  # len_2 contained in len_1
        return 0  # Neigher has changed, so they are equal
def add_two_feature(features, l_id, rhs, l_feature1, l_feature2=None):
    if l_feature2 == None:
        if features.has_key(l_id):
            features[l_id][l_feature1] = rhs
            features[l_id] = FeatStruct()
            features[l_id][l_feature1] = rhs
        if features.has_key(l_id):
            if features[l_id].has_key(l_feature1):
                features[l_id][l_feature1][l_feature2] = rhs
                features[l_id][l_feature1] = FeatStruct()
                features[l_id][l_feature1][l_feature2] = rhs
            features[l_id] = FeatStruct()
            features[l_id][l_feature1] = FeatStruct()
            features[l_id][l_feature1][l_feature2] = rhs
def fifth_pass(xtag_trees):
    fifth_pass() -> list

    Given the result of fourth_pass(), this function will continue
    to build the feature structure, and in this phase we must add all values
    even if they are not defined by the tree grammar.  
    for xtag_entry in xtag_trees:
        features = xtag_entry[4]
        for feature_entry in xtag_entry[1]:
            lhs = feature_entry[0]
            rhs = feature_entry[1]
            l_separator = lhs.find(':')
            r_separator = rhs.find(':')

            if r_separator != -1:
                l_id = lhs[:l_separator]
                r_id = rhs[:r_separator]
                r_feature = rhs[r_separator + 2:-1]
                print r_feature
                l_space = lhs.find(' ')

                if not features.has_key(
                        r_id):  # Make sure features[r_id] exists
                    features[r_id] = FeatStruct()
                    features[r_id][r_feature] = FeatStruct(__value__='')
                elif not features[r_id].has_key(
                ):  # Make sure features[r_id][r_feature] exists
                    features[r_id][r_feature] = FeatStruct(__value__='')

                if (l_space == -1):
                    l_feature = lhs[l_separator + 2:-1]
                    add_two_feature(features, l_id, features[r_id][r_feature],
                    l_feature1 = lhs[l_separator + 2:l_space]
                    l_feature2 = lhs[l_space + 1:-1]
                    add_two_feature(features, l_id, features[r_id][r_feature],
                                    l_feature1, l_feature2)

    return xtag_trees
Beispiel #19
def create_fact(
    pred: FeatStruct, arg_0: Union[List[FeatStruct], FeatStruct], arg_1: Union[List[FeatStruct], FeatStruct]
) -> FeatStruct:
    Construct feature structure than represent fact. 
    ex: know(Gael, [Bas, Justine]), return a feature structure of the type :
         arg0 = [head=Gael, tail=na],
         arg1 = [head=Bas, tail=[head=Justine, tail=na]]]
    arg_0 = arg_0 if isinstance(arg_0, list) else [arg_0]
    arg_1 = arg_1 if isinstance(arg_1, list) else [arg_1]
    return FeatStruct(arg0=format_list(arg_0), arg1=format_list(arg_1), pred=pred)
Beispiel #20
def parse_feature_in_catalog(s):
    # This function parses the string in catalog file, i.e. english.gram
    # with the option 'start-feature', into a FeatStruct. We MUST write
    # separate parsers for different strings from different files, since
    # these features are represented in different forms.
    :param s: The string repersenting start feature in the catalog file
    :type s: str
    :return: A feature structure which is the start feature
    :rtype: FeatStruct

    Given the string, this function will return a feature structure parsed
    from that string. The feature structure should be encoded like this:

    <mode> = ind/imp <comp> = nil <wh> = <invlink>  <punct term> = per/qmark/excl <punct struct> = nil
    All tokens shall be separated by a single space, no comma and period and
    semicolon is used. This parses is designed specially for the string from
    the catalog (i.e. english.gram) file, since there are multiple ways to
    represent the FS in xtag grammar, so we need multiple parsers.
    # token _list is a list of tuples, the element of which is the LHS and
    # RHS of a feature structure definition, i.e. [('mode','ind/imp'),('comp','nil')]
    token_list = []
    while True:
        equal_sign = s.find('=')
        if equal_sign == -1:
        # find between '=' and '<', which is the RHS if no "<LHS> = <RHS>"
        # is used. If it is then we can know that the no-white string between
        # '=' and '<' is an empty string.
        angle_bracket = s.find('<', equal_sign)
        if angle_bracket == -1:
            rhs = s[equal_sign + 1:].strip()
            rhs = s[equal_sign + 1:angle_bracket].strip()
            if rhs == '':
                angle_bracket = s.find('<', angle_bracket + 1)
                if angle_bracket == -1:
                    rhs = s[equal_sign + 1:].strip()
                    rhs = s[equal_sign + 1:angle_bracket].strip()
        lhs = s[:equal_sign].strip()[1:-1]
        token_list.append((lhs, rhs))
        s = s[angle_bracket:]

    new_fs = FeatStruct()
    for token in token_list:
        add_new_fs(new_fs, token[0].split(), token[1])

    return new_fs
Beispiel #21
def add_new_fs(fs, lhs, rhs, ref=0):
    :param fs: The feature structure that we are going to add to it.
    :type fs: FeatStruct
    :param lhs: The path defined for the new node
    :type lhs: list(str)
    :param rhs: The value of the node
    :type rhs: str / Any object
    :param ref: Controls whether rhs should be treated as a string or other object
    :type ref: 0 / 1

    This function will add the feature structure defined by lhs and rhs
    into an existing feature fs. The lhs of the lowest level is defined
    to be '__or_' + rhs to facilitate other procedures.

    If any of the paths defined by lhs has already existed in fs, then
    it will be merged into that existing path, instead of erasing the existing
    one and make a new one, so it is safe to use this function to merge two
    feature structures.

    For example,
    fs = [a = ['www']]
    lhs = ['a','b','c','d','e']
    rhs = 'wzq'
    [a = [['www']                                ]
    [    [b = [c = [d = [e = [__or_wzq = 'wzq']]]]
    if len(lhs) == 1:
        #inner = FeatStruct()
        #inner['__value__'] = rhs
        #fs[lhs[0]] = inner
        # ref means reference. If we are not making reference, then rhs must
        # be a string, and we will process that string
        # But if ref == 1 then we are just making references, so we will not
        # process rhs, but only attach it to the existing feature structure
        if ref == 0:
            fs[lhs[0]] = make_rhs_using_or(rhs)
        elif ref == 1:
            fs[lhs[0]] = rhs
            raise ValueError('Undefined ref value %d' % (ref))
        if fs.has_key(lhs[0]):
            add_new_fs(fs[lhs[0]], lhs[1:], rhs, ref)
            fs[lhs[0]] = FeatStruct()
            add_new_fs(fs[lhs[0]], lhs[1:], rhs, ref)
Beispiel #22
def make_fs(lhs, rhs, ref=0):
    # This function makes a feature structure using a list of lhs which are nested
    # e.g. if lhs = ['a','b','c','d'] and rhs = 'wzq' then the
    # fs shoule be [a = [b = [c = [d = 'wzq']]]]
    :param lhs: The path on the left hand side
    :type lhs: list(str)
    :param rhs: The string on the right hand side / Any object
    :type rhs: str / object
    :param ref: Control whether to treat rhs as a string or as an abitrary object
    :type ref: 0 / 1
    :return: A constructed feature structure
    :rtype: FeatStruct
    Given the path and the right hand side of a feature structure this function
    will return a feature structure exactly has the path defined in lhs and the
    value inside it is the rhs. There are two choices, we can either pass in a
    string as the rhs to let the code to deal with the 'or' problem. or just
    pass in an object and the code will not touch that (ref = 1 needed).
    lhs = ['a','b','c','d']
    rhs = 'wzq'
    ->FeatStruct = [a = [b = [c = [d = [__or_wzq = 'wzq']]]]]
    new_fs = FeatStruct()

    if len(lhs) == 1:
        #inner = FeatStruct()
        #inner['__value__'] = rhs
        #rhs = inner
        # if ref == 0 then we are not making references so we will process
        # rhs, and it must be a string
        if ref == 0:
            rhs = make_rhs_using_or(rhs)
        elif ref == 1:
            pass  # Do nothing
            raise ValueError('Undefined ref value %d' % (ref))

        new_fs[lhs[0]] = rhs
        new_fs[lhs[0]] = make_fs(lhs[1:], rhs, ref)  # Recursively call

    return new_fs
Beispiel #23
def debug():
    a = FeatStruct()
    b = FeatStruct()
    c = FeatStruct()
    d = FeatStruct()
    a['__value__'] = 'OKWANGZiqi'
    b['__value__'] = 'WANGYunpeng'
    c['__value__'] = "WWA!!!"
    d['__value__'] = 'WZQ(*&YTG'
    e = FeatStruct()
    e['first'] = a
    e['second'] = b
    e['third'] = c
    e['fourth'] = d
    f = FeatStruct()
    f['nested'] = e
    g = FeatStruct()
    g['__value__'] = "WAAAAAAAAH!"
    f['single'] = g
    print f
    print ''
    print remove_value_tag(match_feature(f, 'i', 1))
Beispiel #24
def get_timex_featstructs(root):
    timexs = get_timexs(root)
    return [FeatStruct(t.attrib) for t in timexs]
Beispiel #25
 def __hash__(self):
     return FeatStruct.__hash__(self)
Beispiel #26
    path_1 = get_all_path(fs1)
    for i in path_1:
        item_2 = get_element_by_path(fs2, i)
        if item_2 == None:
            item_1 = get_element_by_path(fs1, i)
            add_new_fs(new_fs, i, item_1, 1)

    if tree1 != None:
        correct_other_nodes(correction_list, tree1)
    if tree2 != None:
        correct_other_nodes(correction_list, tree2)

    return new_fs

fs1 = FeatStruct()
fs2 = FeatStruct()
fs3 = FeatStruct()
fs4 = FeatStruct()
fs4['more'] = fs3
fs2['__or_a'] = 'a'
fs2['__or_wzq'] = 'wzq'
fs2['__or_qwe'] = 'qwe'
fs1['apple'] = fs2
fs1['orange'] = fs4
fs3['__or_zxcv'] = 'zxcv'
fs3['__or_4567'] = '4567'
debug_start_feature = parse_feature_in_catalog(
    '<mode> = ind/imp <comp> = nil <wh> = <invlink>  <punct term> = per/qmark/excl <punct struct> = nil'
empty_feature = FeatStruct()
Beispiel #27
 def __hash__(self):
     return FeatStruct.__hash__(self)
class Terminal:
    def __init__(self,name,value,features): = name
        self.value = value
        self.features = features
nodes = []

tree = ET.parse('65.xml')
root = tree.getroot()

for sentence in root.iter('Sentence'):
    for tree in sentence.iter('Tree'):
        for node in tree.iter('Node'):
            if 'Unicode' in node.keys():
                features = FeatStruct()
                for key in node.keys():
                    if key in ['Person','Tense','Voice','Mood','Case','Number','Gender','Degree']:
                        features = features.unify(FeatStruct('['+str(key)+'=\''+str(node.get(key))+'\']'))
                name = str(node.get('Cat')) + ('_'+ str(node.get('Rule')) if node.get('Rule')!=None else '') + ('_' + str(node.get('ClType')) if node.get('ClType')!=None else '')
                children = []
                for child in node.getchildren():
                    children.append(child.get('Cat') + ('_'+ str(child.get('Rule')) if child.get('Rule')!=None else '') + ('_' + str(child.get('ClType')) if child.get('ClType')!=None else ''))
def printCFG(nodes):
    s = ''
    for node in nodes:
Beispiel #29
def special_unify(fs1, fs2, tree1=None, tree2=None):
    :param fs1: One of the feature structures you want to unify
    :type fs1: FeatStruct
    :param fs2: Another feature structure
    :type fs2: FeatStruct
    :param tree1: The tree that you want to restore inter-node reference
    :type tree1: TAGTree
    :param tree2: Another tree, optional.
    :type tree2: TAGTree

    This function will do a unify just like what the normal unify() does, but
    in addition to a normal unification we also have the following features:

    1. Disjunction is considered, e.g. [x = a/b/c] and [x = b/c/d] should yield
    [x = b/c]; [x = a/b/c] and [x = a/b] should yield [x = a/b]
    2. The result is a new feature structure, but the leaf node is not new;
    actually we will make reference to the leaf nodes in fs1 and fs2
    3. When the path and the value are both the same, we will make new nodes,
    and then fix the references in the trees given by parameters tree1 and tree2
    to make the entry point to the new node.
    new_fs = FeatStruct()
    correction_list = []
    path_2 = get_all_path(fs2)  # To save time, no path_1
    for i in path_2:
        item_1 = get_element_by_path(fs1, i)
        if item_1 == None:
            item_2 = get_element_by_path(fs2, i)
            add_new_fs(new_fs, i, item_2,
                       1)  # ref == 1, we only do reference!!
            item_2 = get_element_by_path(fs2, i)
            tc = test_contain(
                item_1, item_2)  # Single entry is the same as multiple entry
            if tc == 1:  # t1 is a subset of t2, we always use the smaller one
                add_new_fs(new_fs, i, item_1, 1)
            elif tc == -1:
                add_new_fs(new_fs, i, item_2, 1)
            elif tc == 0:  # Two entries are the same, we create a new one
                corr_check = search_correction(correction_list, item_1)
                if corr_check == None:
                    new_entry = copy.deepcopy(item_1)
                    # This tuple is used to correct the reference in tree(s)
                    # Enumerating all paths, check whether the id of the value is
                    # equal to either item1 or item2, if it is then change the
                    # reference to new_entry
                    correction_tuple = (new_entry, item_1, item_2)
                    # The return value is the new entry stored if three is not None
                    new_entry = corr_check
                # Add new reference (new entry or existing entry)
                add_new_fs(new_fs, i, new_entry, 1)
            elif tc == None:
                return None  # Conflict
            # Partial intersection, return value is a new FeatStruct only contains
            # the intersection. But we do not need to correct this, since it
            # is brand-new
                add_new_fs(new_fs, i, tc, 1)
            #if i[0] == 'comp': print tc

    # We do not need to check when item_2 != None, because we have already
    # done it in the first loop. In other words, we have processed the overlapping
    # paths, and what is left is to add those in fs1 but not in fs2 into the
    # new feature structure
    path_1 = get_all_path(fs1)
    for i in path_1:
        item_2 = get_element_by_path(fs2, i)
        if item_2 == None:
            item_1 = get_element_by_path(fs1, i)
            add_new_fs(new_fs, i, item_1, 1)

    if tree1 != None:
        correct_other_nodes(correction_list, tree1)
    if tree2 != None:
        correct_other_nodes(correction_list, tree2)

    return new_fs
Beispiel #30
def featStruct(gapUp,
    gap = Variable('?gap')

    if arriveFlag and not (destVpFlag) and not (sourceVpFlag):
        vp = FeatStruct(arrive=FeatStruct(
            t=FeatStruct(t_var=Variable('?t'), time_var=Variable('?time'))))
    elif departFlag and departVpFlag:
        vp = FeatStruct(depart=FeatStruct(
        vp = FeatStruct(

    if destNpFlag and not (busNameNpFlag):
        np = FeatStruct(dest=FeatStruct(
        np = FeatStruct(

    wh = FeatStruct(
        whType=FeatStruct(f=Variable('?f'), type=Variable('?type')))
    sem = FeatStruct(query=FeatStruct(vp=vp, np=np, wh=wh))
    var = Variable('?a')

    para = FeatStruct(gap=gap, sem=sem, var=var)
    paraUpdate = FeatStruct(gap=gapUp, sem=semUp, var=varUp)
    # print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    # print(paraUpdate)
    # paraUpdate.unify(para)['sem']['query']['vp']['arrive']['f']
    return paraUpdate.unify(para)
Beispiel #31
def get_trigram_featstruct(trigram_tuple):
    return FeatStruct(x=trigram_tuple[0],y=trigram_tuple[1],z=trigram_tuple[2])
def analyze_template(s):
    # The return value of this function is a tuple. The first element of the tuple is a dictionary
    # using identifiers from morph.flat, and the entries are feature structures
    # with proper values set. The second element is a dictionary using keys from
    # syntax-coded.flat, which will return a list containing all feature structures
    # from a given identifier.
    lines = s.splitlines()
    feature_list = {}
    feature_list2 = {}
    for l in lines:
        #print l
        l = l.strip()
        if l == '' or l[0] == ';':
        index = l.find(';')
        if index != -1:
            l = l[:index]
            l = l.strip()
        if l[0] == '@':
            if l[-1] != '!':
                raise TypeError("Each line should be terminated with a '!'")
            l = l[1:-1]
            # we only split the name and the other part
            temp = l.split(None,1)
            name = temp[0]
            l = l[len(name):].strip()
            features = l.split(',')

            fs = FeatStruct()
            for f in features:
                f = f.strip()
                index = f.find('=')
                if f[0] == '@' and feature_list.has_key(f[1:]):
                    fs = fs.unify(feature_list[f[1:]])    # unify() does not change in-place
                elif index != -1:
                    lhs = f[:index].strip()
                    rhs = f[index + 1:].strip()
                    if rhs[0] == '@':      # rhs can also be reference
                        rhs = feature_list[rhs[1:]]
                    if lhs[0] != '<' or lhs[-1] != '>':
                        raise TypeError('The left hand side of a feature structure must be wrapped with <>')
                    lhs = lhs[1:-1]
                    path = lhs.split()
                    #path.reverse()    # This method is in-place
                    fs = fs.unify(make_fs(path,rhs))
            feature_list[name] = fs
            #print name
            #print fs,'\n'
        elif l[0] == '#':
            if l[-1] != '!':
                raise TypeError('Invalid input line, must be terminated by "!"')
            l = l[1:-1]
            tokens = l.split(None,1)     # Split for once using space character
            word_pos = tokens[0].strip()

            features = tokens[1].split(',')
            new_fs = FeatStruct()
            for fs in features:
                tokens = fs.split(':',1)
                node_type = tokens[0].strip()
                tokens = tokens[1].split('=',1)
                lhs = tokens[0].strip()[1:-1]    # Remove <>
                rhs = tokens[1].strip()
                lhs = lhs.split()
                if new_fs.has_key(node_type):
                    new_fs[node_type] = new_fs[node_type].unify(make_fs(lhs,rhs))
                    new_fs[node_type] = make_fs(lhs,rhs)
            if feature_list2.has_key(word_pos) == False:
                feature_list2[word_pos] = new_fs
                raise KeyError('Duplicate defitinion detected: %s.' % (word_pos))
            raise TypeError('Cannot recognize line: %s.' % (l))
    return (feature_list,feature_list2)
Beispiel #33
    pred: FeatStruct, arg_0: Union[List[FeatStruct], FeatStruct], arg_1: Union[List[FeatStruct], FeatStruct]
) -> FeatStruct:
    Construct feature structure than represent fact. 
    ex: know(Gael, [Bas, Justine]), return a feature structure of the type :
         arg0 = [head=Gael, tail=na],
         arg1 = [head=Bas, tail=[head=Justine, tail=na]]]
    arg_0 = arg_0 if isinstance(arg_0, list) else [arg_0]
    arg_1 = arg_1 if isinstance(arg_1, list) else [arg_1]
    return FeatStruct(arg0=format_list(arg_0), arg1=format_list(arg_1), pred=pred)

know = FeatStruct(sem="know")
know_r = FeatStruct(sem="know_r")
member = FeatStruct(sem="member")

Gael = FeatStruct(proper="Gael", is_proper=True, explicit=True, is_noun=False, gender="male", form="singular")
Bas = FeatStruct(proper="Bas", is_proper=True, explicit=True, is_noun=False, gender="male", form="singular")
Justine = FeatStruct(proper="Justine", is_proper=True, explicit=True, is_noun=False, gender="female", form="singular")
club = FeatStruct(
    proper="na", is_proper=False, is_noun=True, explicit=True, noun="tennis_club", gender="neuter", form="singular"

# Dynamic rules that are specific to this input data
# for instance "ProperName[proper=Bas] -> "Bas"
dynamic_productions = [entity_specific_rule(entity) for entity in [Gael, Bas, Justine, club]]
Beispiel #34
def mainLogic(doc):
    departFlag = False
    departVpFlag = False
    arriveFlag = False
    sourceVpFlag = False
    destVpFlag = False
    busNameNpFlag = False
    destNpFlag = False
    d = ''
    t = ''
    a = ''
    time = ''
    nameDepart = ''
    nameArrive = ''
    bVar = ''
    h_BusName = ''
    busName = ''
    timeDepart = ''
    cityTokenText = ['Hồ_Chí_Minh', 'Hà_Nội', 'Huế', 'Đà_nẵng', 'Đà_Nẵng']
    busTokenText = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8']
    cityTokenDep = ['compound', 'nmod', 'obl']

    (f, typeWh) = ('f2', 'WHICH1') if (
        subtree_matcher(doc, 'det', text='nào') != []) else ('h1', 'HOWLONG1')

    if (typeWh == 'WHICH1'):
        gap = f
        #Runtime (HOWLONG1 case)
        gap = 'r2'

    if (gap == 'f2'):
        if subtree_matcher(doc, 'case', text='đến') != [] or subtree_matcher(
                doc, 'ccomp', text='đến') != []:
            arriveFlag = True
            a = 'a3'
            # time=subtree_matcher(doc,'nummod')[0] if (len(subtree_matcher(doc,'nummod'))==1) else subtree_matcher(doc,'nummod')
            # print([i.text for i in searchChild(doc,'ROOT')])
                time = [
                    i.text for i in searchChild(doc, 'ROOT') if 'HR' in i.text
                time = ''
            if time != '':
                t = 't2'
                t = '?t'
            if subtree_matcher(doc, 'ROOT', text='đi') != []:
                departFlag = True
                d = 'd3'
                for cT in cityTokenText:
                    for cD in cityTokenDep:
                        temp = subtree_matcher(doc, cD, cT)
                        tempHead = checkHead(doc, temp)
                            tempChild = [i.text for i in searchChild(doc, cD)]
                            tempChild = ''

                        if (temp != []) and (tempHead != 'đi'):
                            destVpFlag = True
                            nameArrive = temp
                        elif (temp != []) and (tempHead
                                               == 'đi') and ('từ'
                                                             in tempChild):
                            sourceVpFlag = True
                            nameDepart = temp
                        elif (temp != []) and (tempHead
                                               == 'đi') and ('đến'
                                                             in tempChild):
                            destVpFlag = True
                            nameArrive = temp
            # elif subtree_matcher(doc,'ROOT',text='xuất_phát')!=[]:
            #     departFlag=True
            #     d='d3'
            #     for cT in cityTokenText:
            #         for cD in cityTokenDep:
            #             temp=subtree_matcher(doc,cD,cT)
            #             tempHead=checkHead(doc,temp)
            #             # try:
            #             #     tempChild=[i.text for i in searchChild(doc,cD)]
            #             # except:
            #             #     tempChild=''
            #             if (temp !=[]) and (tempHead!='xuất_phát'):
            #                 destVpFlag=True
            #                 nameDepart= temp
            #             # elif (temp !=[]) and (tempHead=='xuất_phát') and ('từ' in tempChild):
            #             #     sourceVpFlag=True
            #             #     nameDepart= temp
            #             # elif (temp !=[]) and (tempHead=='xuất_phát') and ('đến' in tempChild):
            #             #     destVpFlag=True
            #             #     nameArrive= temp
                for cT in cityTokenText:
                    for cD in cityTokenDep:
                        temp = subtree_matcher(doc, cD, cT)
                        if temp != []:
                            destNpFlag = True
                            nameArrive = temp
                        # Continue if the inner loop wasn't broken.
                        # Inner loop was broken, break the outer.
        elif subtree_matcher(doc, 'ROOT', text='xuất_phát') != []:
            departFlag = True
            d = 'd3'
            for cT in cityTokenText:
                for cD in cityTokenDep:
                    temp = subtree_matcher(doc, cD, cT)
                    tempHead = checkHead(doc, temp)
                    # try:
                    #     tempChild=[i.text for i in searchChild(doc,cD)]
                    # except:
                    #     tempChild=''
                    if (temp != []) and (tempHead != 'xuất_phát'):
                        departVpFlag = True
                        nameDepart = temp
                    # elif (temp !=[]) and (tempHead=='xuất_phát') and ('từ' in tempChild):
                    #     sourceVpFlag=True
                    #     nameDepart= temp
                    # elif (temp !=[]) and (tempHead=='xuất_phát') and ('đến' in tempChild):
                    #     destVpFlag=True
                    #     nameArrive= temp
        # print(destNpFlag)
        # Variable('?hDest'),name=Variable('?nameDest')
    elif (gap == 'r2'):
        if (subtree_matcher(doc, 'ROOT', text='đến')) != []:
            arriveFlag = True
            a = 'a3'

            time = '?time'
            t = '?t'

            nameArrive = subtree_matcher(doc, 'obj')[0] if (len(
                subtree_matcher(doc, 'obj')) == 1) else subtree_matcher(
                    doc, 'obj')
            if type(nameArrive) == list:
                for obj in nameArrive:
                    if obj in cityTokenText:
                        nameArrive = obj

            if nameArrive != '':
                h = 'h4'
                destVpFlag = True
                h = '?h'

            if subtree_matcher(doc, 'case', text='từ') != []:
                d = 'd3'
                nameDepart = checkHead(doc, 'từ')
                if (nameDepart != ''):
            listObj = subtree_matcher(doc, 'obj')
            listCompound = subtree_matcher(doc, 'compound')

            for sub in listObj:
                if sub in busTokenText:
                    busName = sub

            if busName == '':
                for sub in listCompound:
                    if sub in busTokenText:
                        busName = sub

            if busName != '':
                busNameNpFlag = True
                bVar = 'f2'
                h_BusName = 'h3'

    if arriveFlag and not (destVpFlag) and not (sourceVpFlag):
        vp = FeatStruct(
            arrive=FeatStruct(a=a, f=f, t=FeatStruct(t_var=t, time_var=time)))
    elif departFlag and departVpFlag:
        vp = FeatStruct(
                              t=FeatStruct(t_var=t, time_var=time)),
        vp = FeatStruct(
                              t=FeatStruct(t_var=t, time_var=time)),
                              t=FeatStruct(t_var=t, time_var=time)),
                f=Variable('?f'), name=FeatStruct(h='h6', name=nameArrive))))

    if destNpFlag and not (busNameNpFlag):
        np = FeatStruct(dest=FeatStruct(
                            name=FeatStruct(h='h3', name=nameArrive))))
        np = FeatStruct(the=FeatStruct(
            bus=bVar, busname=FeatStruct(h=h_BusName, name=busName)))

    wh = FeatStruct(whType=FeatStruct(f=f, type=typeWh))
    sem = FeatStruct(query=FeatStruct(vp=vp, np=np, wh=wh))
    var = Variable('?a')

    result = featStruct(gap,
    return result
Beispiel #35
    def _applyConstraints(self, parent, child):
        """ Constraint step:
		Given a satisfied state ``child'' and the updated state ``parent,'' compositional constraints
		are applied in the form of unification between the semantic heads of the two states.
		A successful unification implies the two states are compatible given the compositional 
		In which case, the semantic bodies are merged and updated to reflect the unification.
		Lastly, the string of the ``parent'' is updated.

		Because unification is a tricky process, care needs to be taken to ensure that it is properly
		Variables that have the same name are assumed to be the same, but because the grammar generates
		feature structures for compositional constraints using a small set of variable names, often
		variables can have the same name despite their being independent entities.
		One workaround is to rename the variables in one of the semantic heads before unification.
		It is important that the renamed variables are updated in the semantic bodies as well.
        if LWFG.is_terminal(	# child is a special lexical state, and thus contains no semantic info
            return [True, parent]		# skip

	# Unification of Semantic Heads

	# The compositional constraints are represented in such a way that there a separate set of constraints for each
	# term in the production rule.
	# Each set of constraints is a separate feature structure.
	# The set of constraints for the LHS are indexed with a feature identifier: `h'
	# The set of constraints for the ith term in the RHS are indexed with a feature indentifier: `hi'
	# Thus, when unification is performed, the constraints of the LHS of the child state (indexed by `h') need
	# to be retrieved.
	# Also, because NLTK unifies embedded feature structures only when their feature identifiers are the same,
	# we need to have the feature id of the retrieved set of constraints to match the corresponding feature id
	# in the parent state.
	# Again, that would be `hi' where $i$ is the index of the child's LHS in the parent's RHS.
        hidx = 'h'+str(parent.dotIdx)
        childHead = FS()  # make a new Feature Structure to get around the copy-by-ref issues
        childHead[hidx] = child.head['h']
        parentHead = parent.head
        # Rename variables in childHead to avoid confusion
        # step 0: check if child.body and parent.body share variable names, change any that are shared
        renamedVarMap1 = {}
        usedVars = []
        if parent.body:
            pBodVars = parent.body.variables()
            cBodVars = child.body.variables()
            for v in cBodVars:
                if v in pBodVars: # change v
                    nv = self._newVarName(v)
                    while nv in pBodVars+cBodVars:
                        nv = self._newVarName(nv)
                    renamedVarMap1[Variable(v)] = Variable(nv)
                else: # not shared by parent
            for v in pBodVars:
            for v in child.body.variables():

        # step 1: find used variables from parent's semantic head  
        usedVars += list(parentHead.variables())

        # step 2: rename variables in child's semantic head
        renamedVarMap2 = {}
        childHead = childHead.rename_variables(used_vars=usedVars, new_vars=renamedVarMap2)

	# check if features align with each other
        childFeats = set(childHead[hidx].keys())
        parentFeats = set(parentHead[hidx].keys())
        if not (parentFeats <= childFeats): # True if the relevant set of features of the parent state
					    # are **not** a subset of those of the child state.
            return [False, parent]	    # If True, the states are incompatible.
	# perform unification
        bindings = {}
        parentHead = parentHead.unify(childHead, bindings)
        if not parentHead: # failed to unify
            return [False, parent] # the states are incompatible

        # update pState's rule
        parent.head = parentHead

	# Updating Semantic Bodies
        childBody = LWFG.OntoSeR(str(child.body)) # create new sem body to get around copy-by-ref issues

        # The next part is a bit confusing and needs spelling out.
        # We want to change variables names in the child's semantic body that are shared with
        # the parent's semantic body.
        # However, we can't just do it willy-nilly, we need to ensure that only those variable names
        # that are not linked with the parent's body are changed.
        # For example, in "the smart girl" -- when "n -> det n" is completed, the body corresponding to "the"
        # is linked with the body corresponding to "smart girl".
        # If we change variables names without consideration, that link is lost.
        # To maintain the link, a roundabout method is employed.
        # We first look at all renamed var names in "renamedVarMap2" -- which results from changing vars in
        # the child's semantic head.
        # We then look at all renamed names and see whether they were involved in unification with the parent's
        # head.
        # We can do that by going through the unification "bindings" and checking to see if the renamed names
        # are keys.
        # If they were involved in unification, we check to see if they were bound to a variable in the parent's
        # head that we were about to rename.
        # We can do that by looking to see if the bound variable is a key in "renamedVarMap1" -- which we got
        # from renaming variable names in the child's body that were present in the parent's body.

        for var in renamedVarMap2.keys():
            if renamedVarMap2[var] in bindings.keys() and bindings[renamedVarMap2[var]] in renamedVarMap1.keys():


        if not parent.body:
##            print parent, child
##            print parent.body, child.body
##            print renamedVarMap1
##            print renamedVarMap2
##            print bindings
##            print " "
            parentBody = childBody
            parentBody = LWFG.OntoSeR(str(parent.body) + ',' + str(childBody))
        parent.body = parentBody

	# Updating Strings
        if not parent.string:
            parentString = child.string
            parentString = parent.string + ' ' + child.string
        parent.string = parentString
        return [True, parent]