# print('\n\n\n>>>Generating word structure trees for single char character...') for single_char_word in SingleCharWord: if single_char_word in UpdatedVec: tag_set=UpdatedVec[single_char_word] else: tag_set=Word2Tag[single_char_word] print('Fail!') break tag_str=set2str(tag_set) tree_str='( '+tag_str+'_u '+single_char_word+' )' # revers to old version of discarding extra unary rule on Oct. 5 --- #tree_str=' ( '+tag_str+'_u '+' ( '+tag_str+'_b '+single_char_word+' ) ) ' #<-------- XXX Change on Oct. 4------ tree=ParentedTree(tree_str) index=len(NewForest) NewForest.append(tree) Word2treeID[single_char_word]=index print('done! Such trees have been appended to NewForest, and word2treeId mapping has been stored in Word2treeID hashtable.')
new_tree=ParentedTree(tree.pprint()) for subtree in new_tree.subtrees(): #update current tree string=''.join(subtree.leaves()) if string in Vec: #leaves/string in the record tag, subscript= decompose_tag(subtree.node) tag_vec_str=set2str(Vec[string]) #get the tag-set of the node according to the leaves and convert it to str subtree.node=tag_vec_str+'_'+subscript #update the node with the new_tag NewForest.append(new_tree) for subtree in new_tree.subtrees(lambda x: len(x)>1 and ''.join(x.leaves()) in Vec ): # extraction known production rules string=''.join(subtree.leaves()) left_child=subtree[0] right_child=subtree[1]