#

print('\n\n\n>>>Generating word structure trees for single char character...')


for single_char_word in SingleCharWord:

  if single_char_word in UpdatedVec:
    tag_set=UpdatedVec[single_char_word]

  else:
    tag_set=Word2Tag[single_char_word]
    print('Fail!')
    break

  tag_str=set2str(tag_set)

  tree_str='( '+tag_str+'_u '+single_char_word+' )' # revers to old version of discarding extra unary rule on Oct. 5 ---
  #tree_str=' (   '+tag_str+'_u '+' ( '+tag_str+'_b '+single_char_word+' ) ) '  #<-------- XXX  Change on Oct. 4------

  tree=ParentedTree(tree_str)

  index=len(NewForest)
  NewForest.append(tree)

  Word2treeID[single_char_word]=index

print('done! Such trees have been appended to NewForest, and word2treeId mapping has been stored in Word2treeID hashtable.')


Beispiel #2
0

  new_tree=ParentedTree(tree.pprint())


  

  for subtree in new_tree.subtrees():  #update current tree

    string=''.join(subtree.leaves())

    if  string in Vec:  #leaves/string in the record

      tag, subscript= decompose_tag(subtree.node)

      tag_vec_str=set2str(Vec[string]) #get the tag-set of the node according to the leaves and convert it to str

      subtree.node=tag_vec_str+'_'+subscript  #update the node with the new_tag


  NewForest.append(new_tree)
  

  for subtree in new_tree.subtrees(lambda x: len(x)>1 and ''.join(x.leaves()) in Vec ):  # extraction known production rules

    string=''.join(subtree.leaves())


    left_child=subtree[0]
    right_child=subtree[1]