def convert_to_nltk_tree(tree):
    if isinstance(tree,str):
        return tree
    tree_node=Tree(tree[0],[])
    for child in tree[1:]:
        child_node=convert_to_nltk_tree(child)
        tree_node.insert(len(tree_node),child_node)
    return tree_node
Ejemplo n.º 2
0
 def _remove_tag(self, tree):
     if isinstance(tree, str):
         return re.sub('[0-9]+', 'n', tree.lower())
     if len(tree) == 1:
         return self._remove_tag(tree[0])
     new_t = Tree(tree.label(), [])
     for subtree in tree:
         if isinstance(subtree, Tree):
             subtree = self._remove_tag(subtree)
             new_t.insert(len(new_t), subtree)
     return new_t
Ejemplo n.º 3
0
 def _remove_puntc(self, tree):
     if len(tree) == 1:
         if isinstance(tree[0], str):
             if self.lang == 'il':
                 if tree.label() in string.punctuation or '-' not in tree[
                         0] or '*' in tree[0]:
                     return None
                 else:
                     tree[0] = tree[0].split('-')[0]
             elif word_tags.get(self.lang) is not None and tree.label(
             ) not in word_tags[self.lang]:
                 return None
         else:
             return self._remove_puntc(tree[0])
     new_t = Tree(tree.label(), [])
     for subtree in tree:
         if isinstance(subtree, Tree):
             subtree = self._remove_puntc(subtree)
         if subtree is not None and len(subtree) > 0:
             new_t.insert(len(new_t), subtree)
     return new_t
def modify_tree_structure(parent_tree):
    # Mark all subtrees position as 0
    tree_traversal_flag = label_parse_subtrees(parent_tree)
    # Initialize new parse tree
    modified_parse_tree = Tree('ROOT', [])
    i = 0
    for sub_tree in parent_tree.subtrees():
        if sub_tree.label() == "NP":
            i, modified_parse_tree = handle_noun_clause(i, tree_traversal_flag, modified_parse_tree, sub_tree)
        if sub_tree.label() == "VP" or sub_tree.label() == "PRP":
            i, modified_parse_tree = handle_verb_prop_clause(i, tree_traversal_flag, modified_parse_tree, sub_tree)

    # recursively check for omitted clauses to be inserted in tree
    for sub_tree in parent_tree.subtrees():
        for child_sub_tree in sub_tree.subtrees():
            if len(child_sub_tree.leaves()) == 1:  #check if subtree leads to some word
                if tree_traversal_flag[child_sub_tree.treeposition()] == 0 and tree_traversal_flag[child_sub_tree.parent().treeposition()] == 0:
                    tree_traversal_flag[child_sub_tree.treeposition()] = 1
                    modified_parse_tree.insert(i, child_sub_tree)
                    i = i + 1

    return modified_parse_tree
Ejemplo n.º 5
0
def convert_sentence(input_string: str):
    java_path = '/usr/bin/java'
    os.environ['CLASSPATH'] = java_path

    if input_string.split() == 1:
        return None

    if len(input_string.split()) == 1:
        path = create_video(input_string)
        return path

    parser = CoreNLPParser(url='http://localhost:9000')

    englishtree = [tree for tree in parser.parse(input_string.split())]
    parsetree = englishtree[0]

    dict = {}

    # parenttree = ParentedTree(node=parsetree, children=[])
    parenttree = ParentedTree.fromstring(str(parsetree))

    # print("Input Sentence: ", input_string)
    # print("Input Sentence Tree\n")
    # print(parenttree)
    print("\n\n")

    for sub in parenttree.subtrees():
        dict[sub.treeposition()] = 0

    #----------------------------#

    islTree = Tree('ROOT', [])
    i = 0

    for sub in parenttree.subtrees():
        if (sub.label() == "NP" and dict[sub.treeposition()] == 0
                and dict[sub.parent().treeposition()] == 0):
            dict[sub.treeposition()] = 1
            islTree.insert(i, sub)
            i = i + 1

        if (sub.label() == "VP" or sub.label() == "PRP"):
            for sub2 in sub.subtrees():
                if ((sub2.label() == "NP" or sub2.label() == 'PRP')
                        and dict[sub2.treeposition()] == 0
                        and dict[sub2.parent().treeposition()] == 0):
                    dict[sub2.treeposition()] = 1
                    islTree.insert(i, sub2)
                    i = i + 1

    for sub in parenttree.subtrees():
        for sub2 in sub.subtrees():
            if (len(sub2.leaves()) == 1 and dict[sub2.treeposition()] == 0
                    and dict[sub2.parent().treeposition()] == 0):
                dict[sub2.treeposition()] = 1
                islTree.insert(i, sub2)
                i = i + 1

    parsed_sent = islTree.leaves()

    # words = parsed_sent

    # print("ISL Tree\n")
    # print(islTree)
    # print("\n\n")

    # nltk.download('stopwords')
    # nltk.download('wordnet')
    # print()

    stop_words = set(stopwords.words("english"))

    lemmantizer = WordNetLemmatizer()
    # ps = PorterStemmer()
    lemmantized_words = []

    for w in parsed_sent:
        # w = ps.stem(w)
        lemmantized_words.append(lemmantizer.lemmatize(w))

    islSentence = ""

    for w in lemmantized_words:
        if w not in stop_words:
            islSentence += w
            islSentence += " "

        # islSentence += w
        # islSentence += " "

    # print("ISL Sentence\n")
    # print(islSentence)
    # print("\n\n")
    path = create_video(islSentence)

    return path