def receiverIsSpecified(tree): child_nodes = [child.label() for child in tree if isinstance(child, nltk.Tree)] leaves =[subtree.leaves()[0] for subtree in tree.subtrees() if subtree.label() == 'Keyword'] Receiverleave = [subtree.leaves()[0] for subtree in tree.subtrees() if subtree.label() == 'TEXT2'] return (('Keyword' in child_nodes) and ('to' in leaves or 'for' in leaves)) or (len(Receiverleave) > 0)
def PrintResult2(tree): #drawing(tree) global ctree ctree = '' for subtree in tree.subtrees(): if subtree.label() == 'CommandVerb': ctree += ("You want to ") ctree += str(subtree.leaves()) elif subtree.label() == 'SMSInitial': ctree += " Message" elif subtree.label() == 'SMS': ctree += '' ctree += str(subtree.leaves()) elif subtree.label() == 'Keyword': if subtree.leaves()[0] == 'an': ctree += '\n Intent ' elif subtree.leaves()[0] == 'to': ctree += '\n The receiver is' elif subtree.leaves()[0] == 'at': ctree += '\n Time ' elif subtree.leaves()[0] == 'repeat': ctree += '\n Frequency ' elif subtree.leaves()[0] == 'say': ctree += '\n with the additional command :' elif subtree.label() == 'TEXT1': ctree += '' ctree += str(subtree.leaves()) return ctree
def process_expression(self, expr_tree): val = None oplist = ['add_expr', 'mul_expr', 'sub_expr', 'div_expr'] #print(expr_tree) for tree in expr_tree.subtrees(): if (tree.label() == "expression_t"): for t in tree.subtrees(): if (t.label() in oplist): break if (t.label() != "expression_t" and ((t.label() == "id" or t.label() == "num") and t.right_sibling() is None)): for l in t.subtrees(): if (l.label().isdigit() and (l.label() != "id" and l.label() != "num")): #print("Returning") return int(l.label()) elif ((l.label() != "id" and l.label() != "num")): #print("Returning2") return (self.lookup(l.label())) elif (tree.label() in oplist): #print("expr", str(t)) newstr = str(t) nstr = (((newstr.replace(' ', '')).replace(')', '')).replace( '(', ' ')).replace('\n', '') #print(nstr) val = self.convertsymbols(nstr) #print("expr val", val) #val = self.evaluate_expr(newstr) return val
def breadth_first_search(tree): """ :param tree: :return: tree_node_lst: list of nodes following BFS order meta_lst: not sure meta: not sure """ meta = dict() list_subtree = list(tree.subtrees()) meta_lst = [] tree_node_lst = [] queue_tree = queue.Queue() queue_tree.put(tree) meta[list_subtree.index(tree)] = [] found_prob = False while not queue_tree.empty(): node = queue_tree.get() if len(node) <= 0: warnings.warn("[bft]: len(node) <= 0!! will cause error later") found_prob = True tree_node_lst.append(node) meta_lst.append(meta[list_subtree.index(node)]) for i in range(len(node)): child = node[i] if isinstance(child, nltk.Tree): meta[list_subtree.index(child)] = deepcopy( meta[list_subtree.index(node)]) meta[list_subtree.index(child)].append(i) queue_tree.put(child) return tree_node_lst, meta_lst, meta
def PrintResult2(tree): #drawing(tree) global ctree ctree = '' for subtree in tree.subtrees(): if subtree.label() == 'CommandVerb': ctree += ('CommandVerb = ') ctree += str(subtree.leaves()) elif subtree.label() == 'SMSInitial': ctree += '\n Message' elif subtree.label() == 'SMS': ctree += '' ctree += str(subtree.leaves()) elif subtree.label() == 'Keyword': if subtree.leaves()[0] == 'an': ctree += '\n Intent' elif subtree.leaves()[0] == 'to': ctree += '\n Receiver' elif subtree.leaves()[0] == 'at': ctree += '\n Time' elif subtree.leaves()[0] == 'repeat': ctree += '\n Frequency' elif subtree.leaves()[0] == 'say': ctree += '\n additional say :' elif subtree.label() == 'TEXT1': ctree += '' ctree += str(subtree.leaves()) return ctree
def process_if_ternary_conditional(self, tree): boollist = [ "boolean_exp_equal", "boolean_exp_val_true", "boolean_exp_not", "boolean_exp_not_equal", "boolean_exp_lessthan", "boolean_exp_greaterthan", "boolean_exp_greaterthan_equal", "boolean_exp_lesserthan_equal", "boolean_exp_or", "boolean_exp_and", "boolean_exp_val_false" ] booldict = { boollist[0]: '==', boollist[1]: 'true', boollist[2]: 'not', boollist[3]: '!=', boollist[4]: '<', boollist[5]: '>', boollist[6]: '>=', boollist[7]: '<=', boollist[8]: 'or', boollist[9]: 'and', boollist[10]: 'false' } for child in tree.subtrees(): boolean = None if (str(child.label()) in boollist): boolean = self.process_boolean(child, boollist, booldict) #boolean = False #print("Boolean",boolean) if ((child.right_sibling() is not None) and boolean == True): #print("if", child.right_sibling().pretty_print()) self.process_command(child.right_sibling()) return elif (boolean == False): #print("else", child.right_sibling().pretty_print()) self.process_command(child.right_sibling().right_sibling()) return
def PrintResult2(tree): for subtree in tree.subtrees(): if subtree.label() == 'CommandVerb': print("CommandVerb = ", subtree.leaves()) elif subtree.label() == 'SMSInitial': print("Message") elif subtree.label() == 'SMS': print("", subtree.leaves()) if subtree.label() == 'Keyword': if subtree.leaves()[0] == 'se': print("Intent =", subtree.leaves()) elif subtree.leaves()[0] == 'to': print("Receiver", subtree.leaves()) elif subtree.leaves()[0] == 'at': print("Time") elif subtree.leaves()[0] == 'repeat': print("Frequency") elif subtree.leaves()[0] == 'say': print("additional say :") elif subtree.label() == 'KeywordContent': if subtree.leaves()[0] == 'content': print("Message = ", subtree.leaves()) elif subtree.label() == 'TEXT1': print("", subtree.leaves())
def PrintResult(tree): for subtree in tree.subtrees(): if subtree.label() == 'CommandVerb': print("CommandVerb = ", subtree.leaves()) elif subtree.label() == 'SMSInitial': print("Message") elif subtree.label() == 'SMS': print("", subtree.leaves()) elif subtree.label() == 'Keyword': if subtree.leaves()[0] == 'an': print("Intent") elif subtree.leaves()[0] == 'to': print("Receiver") elif subtree.leaves()[0] == 'at': print("Time") elif subtree.leaves()[0] == 'repeat': print("Frequency") elif subtree.leaves()[0] == 'say': print("additional say :") elif subtree.label() == 'TEXT1': print("", subtree.leaves()) elif subtree.label() == 'Contact': print("Receiver = ", subtree.leaves()) elif subtree.label() == 'Intent': print("Intent =", subtree.leaves()) elif subtree.label() == 'Content': print("Content = ", subtree.leaves()) elif subtree.label() == 'Time': print("Time = ", subtree.leaves()) elif subtree.label() == 'AdditionalSlot': print("Additional = ", subtree.leaves()) elif subtree.label() == 'FrequencySlot': print("Frequency = ", subtree.leaves())
def _get_parsed_sent(self, grid, pos_in_tree): words = self._get_column(grid, self._colmap['words']) pos_tags = self._get_column(grid, self._colmap['pos']) parse_tags = self._get_column(grid, self._colmap['tree']) treestr = '' for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags): if word == '(': word = '-LRB-' if word == ')': word = '-RRB-' if pos_tag == '(': pos_tag = '-LRB-' if pos_tag == ')': pos_tag = '-RRB-' (left, right) = parse_tag.split('*') right = right.count(')') * ')' # only keep ')'. treestr += '%s (%s %s) %s' % (left, pos_tag, word, right) try: tree = self._tree_class.parse(treestr) except (ValueError, IndexError): tree = self._tree_class.parse('(%s %s)' % (self._top_node, treestr)) if not pos_in_tree: for subtree in tree.subtrees(): for i, child in enumerate(subtree): if (isinstance(child, nltk.Tree) and len(child) == 1 and isinstance(child[0], basestring)): subtree[i] = (child[0], child.node) return tree
def get_pos_similarity(corpus): from math import log pos = ['ROOT', 'FRAG', 'SBARQ', 'SBAR', 'SQ', 'S', 'SINV', 'WHNP', 'NP', 'VP', 'PRT', 'INTJ', 'WHPP', 'PP', 'WHADVP', 'ADVP', 'WHADJP', 'ADJP', 'NP-SUBJ', 'WP', 'NN', 'NNS', 'PRP', 'PRP$', 'CD', 'JJ', 'IN', 'VB', 'UH', 'TO', 'VBP', 'WRB', 'NOT', 'DT', 'RB', 'MD', 'RP', 'VBG', 'POS', 'VBZ', 'CC', 'VBD', 'COMP', 'EX', 'VBN', 'WDT', 'PDT', 'WP$', 'JJR', 'NN-SUBJ', 'NNS-SUBJ', 'PRP-SUBJ'] pos_frequency_dict, pos_similarity_dict = {}, {} # Fill in default values for frequency and similarity dictionaries for p in pos: pos_frequency_dict[p], pos_similarity_dict[p] = {}, {} for p2 in pos: pos_frequency_dict[p][p2], pos_similarity_dict[p][p2] = 0.0000000001, 0 # Loop over trees in corpus and, for each subtree, increment the value for the subtree's parent for tree in corpus: #tree = subject_tag(tree) tree = ParentedTree.convert(tree) for subtree in tree.subtrees(): current_pos = subtree.label() parent_node = subtree.parent() if parent_node is not None and current_pos in pos and parent_node .label() in pos: pos_frequency_dict[current_pos][parent_node.label()] += 1 # Loop over frequency dictionary, changing frequency counts to proportions for pos in pos_frequency_dict.keys(): total = sum(pos_frequency_dict[pos].values()) for pos2 in pos_frequency_dict[pos].keys(): pos_frequency_dict[pos][pos2] = pos_frequency_dict[pos][pos2]/float(total) # Loop over entries in similiarity dictionary, calculating relative entropy for each category pair based on parent-node distributions for current_pos in pos_similarity_dict.keys(): for compare_pos in pos_similarity_dict[current_pos].keys(): #relative_entropy = [] relative_entropy = 0 for parent in pos_similarity_dict[current_pos].keys(): p = pos_frequency_dict[current_pos][parent] q = pos_frequency_dict[compare_pos][parent] #relative_entropy.append(float(p)*log(float(p)/float(q), 2)) relative_entropy += float(p)*log(float(p)/float(q), 2) #pos_similarity_dict[current_pos][compare_pos] = -sum(relative_entropy) pos_similarity_dict[current_pos][compare_pos] = -relative_entropy pos_similarity_dict[current_pos][current_pos] = 20 pos_similarity_dict[current_pos]['VB____'] = -100 pos_similarity_dict[current_pos]['*'] = 0 pos_similarity_dict[current_pos]['XP'] = 0 # Add in values for the gap position and the wild-card character pos_similarity_dict['VB____'] = {} pos_similarity_dict['*'] = {} pos_similarity_dict['XP'] = {} for pos in pos_similarity_dict.keys(): pos_similarity_dict['VB____'][pos] = -100 pos_similarity_dict['*'][pos] = 0 pos_similarity_dict['XP'][pos] = 0 pos_similarity_dict['VB____']['VB____'] = 100 pos_similarity_dict['*']['*'] = 0 pos_similarity_dict['XP']['XP'] = 0 return pos_similarity_dict
def process_tree(tree): subtrees = tree.subtrees() # get labels of each subtree labels = {} for st in subtrees: label_node(st, labels) # get trimmed sentence with open('temp', 'w') as f: get_trimmed(tree, labels, f) trimmed = '' with open('temp', 'r') as f: trimmed = f.read() return trimmed
def build_corpus(tree_bank, flat_structure = True): sentences = [] for tree in tree_bank: #tree = subject_tag(tree) if flat_structure: sentence = [[x.leaves()[0], x.label()] for x in tree.subtrees() if len([y for y in x.subtrees()])==1] else: sentence = convert_tree(tree) sentence = filter_sentence(sentence) if sentence != 'bad-tag': sentences.append(sentence) kdata = [] for sentence in sentences: for i in sentence.split(): if i[len(i)-3:] in ['VBZ','VBP']: kdata.append(sentence); break return(kdata)
def get_chunksent(tree): """derive the shallow chunk information from the full parsed tree""" def filter(stree): return stree.node == "NP" and "NP" not in [child.node for child in stree] NP_phrases = [t.pos() for t in tree.subtrees(filter)] chunk_sent = [(word,pos,"O") for (word,pos) in tree.pos()] for p,k in enumerate(tree.pos()): for np_phrase in NP_phrases: if k in np_phrase: if np_phrase.index(k) == 0: chunk_sent[p] = (k[0],k[1],"B") else: chunk_sent[p] = (k[0],k[1],"I") return chunk_sent
def process_while(self, tree): boollist = [ "boolean_exp_equal", "boolean_exp_val_true", "boolean_exp_not", "boolean_exp_not_equal", "boolean_exp_lessthan", "boolean_exp_greaterthan", "boolean_exp_greaterthan_equal", "boolean_exp_lesserthan_equal", "boolean_exp_or", "boolean_exp_and", "boolean_exp_val_false" ] booldict = { boollist[0]: '==', boollist[1]: 'true', boollist[2]: 'not', boollist[3]: '!=', boollist[4]: '<', boollist[5]: '>', boollist[6]: '>=', boollist[7]: '<=', boollist[8]: 'or', boollist[9]: 'and', boollist[10]: 'false' } fenv = {} idcount = 0 var = "" val = 0 rval = 0 comm = () boolean = None for child in tree.subtrees(): #boolean = 0 if (child.label() in boollist): boolean = child #boolean = True #print("Boolean",boolean) if ((child.right_sibling() is not None)): #print("if", child.right_sibling().pretty_print()) comm = child.right_sibling() break while (self.process_boolean(boolean, boollist, booldict)): #print("Again") self.process_command(comm) #print("REturn again") return
def whLang(tree): ctree = '' for subtree in tree.subtrees(): if subtree.label() == 'Keyword': if subtree.leaves()[0] == 'an': continue elif subtree.leaves()[0] == 'in': ctree += '\n Language ' elif subtree.leaves()[0] == 'to': continue elif subtree.leaves()[0] == 'at': continue elif subtree.leaves()[0] == 'repeat': continue elif subtree.leaves()[0] == 'say': continue elif subtree.label() == 'TEXT1': ctree += str(subtree.leaves()) return ctree
def parseToList(s): results = parse_maverick_command(s) if (results is None): print("**********************Not parsed***********************") i = 0 for tree in results: i += 1 print(tree) print("**********************Structured Model***********************") for subtree in tree.subtrees(): if subtree.label() == 'Intent': print("Intent = ", subtree.leaves()) elif subtree.label() == 'CommandVerb': print("CommandVerb = ", subtree.leaves()) elif subtree.label() == 'Contacts': print("Contacts = ", subtree.leaves()) elif subtree.label() == 'ContactPreposition': print("ContactPreposition = ", subtree.leaves()) elif subtree.label() == 'RepeatPhrase': print("RepeatPhrase = ", subtree.leaves()) elif subtree.label() == 'TimePreposition': print("TimePreposition = ", subtree.leaves()) elif subtree.label() == 'Time': print("Time = ", subtree.leaves()) elif subtree.label() == 'SMSInitial': print("SMSInitial = ", subtree.leaves()) elif subtree.label() == 'SMS': print("SMS = ", subtree.leaves()) elif subtree.label() == 'AdditionalCommand': print("AdditionalCommand = ", subtree.leaves()) print("=============================================") if (i == 0): print("====================Not parsed=========================") if (i <= 1): print("=============================================") else: print("=====================Ambiguity========================") return (i > 0 and i < 2) """
def process_for(self, tree): fenv = {} idcount = 0 var1 = "" var2 = "" val = 0 rval = 0 comm = () for child in tree.subtrees(): if (child.label() == "id" and idcount == 0): idcount += 1 for c in child.subtrees(): if (c.label() != "id"): var1 = c.label() val1 = self.lookup(var1) fenv = self.update_new(fenv, c.label(), val) elif (child.label() == "id" and idcount == 1): idcount += 1 for c in child.subtrees(): if (c.label() != "id"): var2 = c.label() rval = self.lookup(c.label()) fenv = self.update_new(fenv, c.label(), rval) elif (child.label() == "num" and idcount == 1): idcount += 1 for c in child.subtrees(): if (c.label() != "num"): rval = int(c.label()) if (idcount == 2): comm = child.right_sibling() break i = var1 val = val1 #print("value 1",val1) for val in range(val1, rval + 1, 1): #print("For for ") self.update(var1, val) self.process_command(comm) newval = self.lookup(var1) val = newval
def process_boolean(self, tree, blist, bdict): #print("----------------------------------------------------------------------------------------------------------") op = None val1 = None val2 = None for c in tree.subtrees(): #print("Helooooo", c.label()) if c.label() in blist: op = bdict[c.label()] if (c.label() == "boolean_exp_not"): for t in c.subtrees(): if (t.label() != "boolean_exp_not"): val1 = self.process_boolean(t, blist, bdict) return not (val1) if c.label() == "expression_t" and (c.right_sibling() is not None): #print("------------------------------------------------------------------------") val1 = self.process_expression(c) #print("Val1",val1) val2 = self.process_expression(c.right_sibling()) #print("Val2", val2) break if (op == '=='): return (val1 == val2) elif (op == 'true'): return True elif (op == 'false'): return False elif (op == '!='): return (val1 != val2) elif (op == '<'): return (val1 < val2) elif (op == '<='): return bool(val1 <= val2) elif (op == '>'): return (val1 > val2) elif (op == '>='): return (val1 >= val2) elif (op == 'or'): return (val1 or val2) elif (op == 'and'): return (val1 and val2)
def convert_tree(tree): tree = ParentedTree.convert(tree) subtrees = [x for x in tree.subtrees()] open_nodes, closed_nodes, new_tree = [], [], [] for subtree in subtrees: sub_subtrees = [x for x in subtree.subtrees()] if len(sub_subtrees) > 1: open_nodes.insert(0,subtree.treeposition()) new_tree.append(['[', subtree.label()]) else: new_tree.append([subtree.leaves()[0], subtree.label()]) closed_nodes.append(subtree.treeposition()) for node in open_nodes: sub_nodes = [x.treeposition() for x in tree[node].subtrees() if x is not tree[node]] if close_check(sub_nodes, closed_nodes): new_tree.append([']', tree[node].label()]) closed_nodes.append(node) for node in closed_nodes: if node in open_nodes: open_nodes.remove(node) return new_tree
def subject_tag(tree): tree = ParentedTree.convert(tree) subjects = [] for subtree in [x for x in tree.subtrees()]: if subtree.right_sibling() is not None: if subtree.label() == 'NP' and subtree.right_sibling().label() == 'VP' and (is_parent('S', subtree) or is_parent('SQ', subtree) or is_grandparent('SQ', subtree)): subjects.append(subtree) for subject in subjects: subject.set_label('NP-SUBJ') subj_heads = ['NN', 'NNS', 'PRP', 'NNP', 'NNPS'] for preterminal in subject.subtrees(): subj_head = False if preterminal.label() in subj_heads: if is_parent('NP-SUBJ', preterminal): subj_head = True elif is_grandparent('NP-SUBJ', preterminal) and preterminal.right_sibling() is None: subj_head = True elif is_grandparent('NP-SUBJ', preterminal) and preterminal.right_sibling() != 'POS': subj_head = True if subj_head: if preterminal.label() == 'NNP': preterminal.set_label('NN-SUBJ') elif preterminal.label() == 'NNPS': preterminal.set_label('NNS-SUBJ') else: preterminal.set_label(preterminal.label() + '-SUBJ') return Tree.convert(tree)
def iterate_and_update(self, tree): #print("lol life is fun") val = None var = "" dtype = "" #print("Pretty", tree.pretty_print()) for c in tree.subtrees(): #print("Label", c.label()) if (c.label() == "id"): #print("It is id", c.label()) for k in c.subtrees(): #print(k.label()) if (k.label() != "id"): #print("sdjshdj", k.label()) var = k.label() if (c.label() == "value_num"): for k in c.subtrees(): if (k.label() != "num" and k.label() != "value_num"): val = int(k.label()) break if (c.label() == "value_string"): for k in c.subtrees(): if (k.label() != "id" and k.label() != "value_string"): val = str(k.label()) break if (c.label() == "datatype_int"): #print("Type", c.label()) dtype = "int" if (c.label() == "datatype_str"): dtype = "str" if (c.label() == "datatype_boolean"): dtype = "bool" if (c.label() == "expression_t"): #print("Expression") val = self.process_expression(c) break if (var != ""): #print("variable value", var, val) self.update(var, val)
def PrintResult(tree): for subtree in tree.subtrees(): if subtree.label() == 'Intent': print("Intent = ", subtree.leaves()) elif subtree.label() == 'CommandVerb': print("CommandVerb = ", subtree.leaves()) elif subtree.label() == 'Contacts': print("Contacts = ", subtree.leaves()) elif subtree.label() == 'ContactPreposition': print("ContactPreposition = ", subtree.leaves()) elif subtree.label() == 'RepeatPhrase': print("RepeatPhrase = ", subtree.leaves()) elif subtree.label() == 'TimePreposition': print("TimePreposition = ", subtree.leaves()) elif subtree.label() == 'Time': print("Time = ", subtree.leaves()) elif subtree.label() == 'SMSInitial': print("SMSInitial = ", subtree.leaves()) elif subtree.label() == 'SMS': print("SMS = ", subtree.leaves()) elif subtree.label() == 'AdditionalCommand': print("AdditionalCommand = ", subtree.leaves()) print("=============================================")
def PrintResult(tree): for subtree in tree.subtrees(): if subtree.label() == 'CommandVerb': print("CommandVerb = ", subtree.leaves()) if subtree.label() == 'NewVerb': print("CommandVerb = ", subtree.leaves()) elif subtree.label() == 'Contact': print("Receiver = ", subtree.leaves()) elif subtree.label() == 'Intent': print("Intent =", subtree.leaves()) elif subtree.label() == 'Content': print("Content = ", subtree.leaves()) elif subtree.label() == 'Time': print("Time = ", subtree.leaves()) elif subtree.label() == 'AdditionalSoundSlot': print("AdditionalSound = ", subtree.leaves()) elif subtree.label() == 'AdditionalNotifySlot': print("AdditionalNotify = ", subtree.leaves()) elif subtree.label() == 'Frequency': print("Frequency = ", subtree.leaves()) elif subtree.label() == 'Initial': print("Initial = ", subtree.leaves()) print("=============================================")
maverick_nlu_parser = nltk.RecursiveDescentParser(local_maverick_grammar) command_tokens = command.split() return maverick_nlu_parser.parse(command_tokens) i = 1 results = parse_maverick_command( "please send sms at 9 pm Tomorrow to Hassan body take your medicine say it loudly" ) for tree in results: print(i) i += 1 print("=============================================") for subtree in tree.subtrees(): if subtree.label() == 'PoliteExpression': print("PoliteExpression = ", subtree.leaves()) elif subtree.label() == 'CommandVerb': print("CommandVerb = ", subtree.leaves()) elif subtree.label() == 'Intent': print("Intent = ", subtree.leaves()) elif subtree.label() == 'TimeSentence': print("TimeSentence = ", subtree.leaves()) elif subtree.label() == 'ContactsSentence': print("ContactsSentence = ", subtree.leaves()) elif subtree.label() == 'BodySentence': print("BodySentence = ", subtree.leaves()) elif subtree.label() == 'AdditionalCommand': print("AdditionalCommand = ", subtree.leaves()) print("=============================================")
def main(): while 1 == 1 : print("Enter a statement") statement = raw_input().strip() if statement == '': continue if statement.lower() in ['bye','goodbye','tata','good-bye']: print("Good-bye, dear human") exit() userNameLoader() #loads the username tagged_arr = Viterbi(statement) tokens = word_tokenize(statement) isFile = False isDir = False #check if all of the elements are same count = 1 tag = tagged_arr[1] for i in range(2,len(tagged_arr)): if tagged_arr[i] == tag: count = count + 1 if count == len(tagged_arr)-1: n = len(tokens) for i in range(0,n): tag_temp = Viterbi(tokens[i])[1] tagged_arr[i+1] = tag_temp for i in range(0,len(tokens)): if i+2 <= len(tokens): if tokens[i] in ['folder','file','directory'] and tagged_arr[i+2] in ['VB','VBN']: tagged_arr[i+1] = 'NN' elif tokens[i] in ['folder','file','directory'] and tagged_arr[i] in ['VB','VBN']: tagged_arr[i+1]='NN' for i in range (0,len(tokens)): if tagged_arr[i+1] in ['NN','NNS','NP','VB','AN','JJ'] and tokens[i]!= 'open': for j in range(0,len(appnames)): if tokens[i].lower() in appnames[j] and tokens[i].lower() not in ['file','folder','directory','copy','videos','desktop']: tagged_arr[i+1]='AN' tokens[i] = commands[j] isFile = True break if isDirName(userName,tokens[i])==True: tagged_arr[i+1] = 'AN' isDir = True elif isFileName(userName,tokens[i])==True: tagged_arr[i+1] = 'AN' isFile = True for i in range (0,len(tokens)): if tokens[i] in verbList: tagged_arr[i+1] = 'VB' break elif tokens[i] in ['words','lines']: tagged_arr[i+1] = 'NNS' break #print(tagged_arr) grammar_string = """ S -> NPP VP S -> VP NPP -> MODAL PRONOUN | NOUN VA | APPNAME NPP -> DET FOLDER VERB NAME | FOLDER VERB NAME| FOLDER NAME | DET NAME NPP -> DET JJ FOLDER VERB NAME | JJ FOLDER VERB NAME| JJ FOLDER NAME NPP -> DET AN FOLDER VERB NAME | AN FOLDER VERB NAME| AN FOLDER NAME NPP -> DET APPNAME NPP -> BACK TONAME | DET BACK TONAME NPP -> WQUERY WQUERY -> WQL AP NOUN | WRB AP NOUN BACK -> 'background' | 'BACKGROUND' | 'Background' BACK -> 'wallpaper' | 'WALLPAPER' | 'Wallpaper' BACK -> AN TONAME -> TO FILENAME | TO DET FILENAME CPY -> DET FILENAME SOURCE DESTINATION | DET FILENAME DESTINATION SOURCE CPY -> FILENAME SOURCE DESTINATION | FILENAME DESTINATION SOURCE SOURCE -> IN SOURCER SOURCER -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME SOURCER -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME DESTINATION -> TO DESTINATIONR DESTINATIONR -> DET FOLDER VBN APPNAME | DET FOLDER APPNAME | DET APPNAME DESTINATIONR -> FOLDER VBN APPNAME | FOLDER APPNAME | APPNAME FOLDER -> 'folder'|'directory'|'file'|'Folder'|'File'|'Directory'|'FOLDER'|'FILE'|'DIRECTORY' FOLDER -> NN VP -> VERB NPP | VERB VP | ADVERB VP | VERB CPY VP -> BER RB IN PPS PPS -> DET PP | PP PP -> JJ NOUN | NOUN | FOLDER VBN DET FILENAME | FOLDER VBN FILENAME | FOLDER FILENAME | FOLDER DET FILENAME PP -> FILENAME MODAL -> MD PRONOUN -> PPSS | PPO VA -> VERB APPNAME APPNAME -> AN VERB -> VB | VBN ADVERB -> RB DET -> AT NOUN -> NN | NP | NNS FILENAME -> AN """ str = 'NAME -> ' for i in range(1,len(tagged_arr)): str+=tagged_arr[i] if i < len(tagged_arr)-1: str+=" | " str+="\n" grammar_string += str #add POS tags tl = len(tagged_arr) for i in range(1,tl): if tokens[i-1] not in ['file','folder','directory']: grammar_string+=tagged_arr[i]+" -> \'"+tokens[i-1]+"\'\n" simple_grammar = CFG.fromstring(grammar_string) #print(simple_grammar) parser = nltk.ChartParser(simple_grammar) json_str = '' ANs= [] ANJSON = [] VBs = [] VBJSON = [] NAMEs= [] NJSON = [] CCYs = [] SOURCEs = [] DESTs = [] FILENAMEs = [] TONAMEs = [] TONAMEFILEs = [] PPs = [] PPANs = [] WQUERY = [] OBJ = [] for tree in parser.parse(tokens): #print(tree) ANs = list(tree.subtrees(filter=lambda x: x.label()=='AN')) VBs = list(tree.subtrees(filter=lambda x: x.label()=='VERB')) NAMEs = list(tree.subtrees(filter=lambda x: x.label()=='NAME')) CCYs = list(tree.subtrees(filter=lambda x:x.label()=='CCY')) SOURCEs = list(tree.subtrees(filter=lambda x:x.label()=='SOURCER')) SOURCEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), SOURCEs) DESTs = list(tree.subtrees(filter = lambda x:x.label()=='DESTINATIONR')) DESTs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), DESTs) FILENAMEs = list(tree.subtrees(filter = lambda x:x.label()=='FILENAME')) FILENAMEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), FILENAMEs) TONAMEs = list(tree.subtrees(filter=lambda x:x.label()=='TONAME')) TONAMEFILEs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), TONAMEs) PPs = list(tree.subtrees(filter = lambda x:x.label()=='PP')) PPANs = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='AN')), PPs) WQUERY = list(tree.subtrees(filter = lambda x:x.label()=='WQUERY')) OBJ = map(lambda x: list(x.subtrees(filter=lambda x: x.label()=='NOUN')), WQUERY) if(len(PPANs)>0): PPANs = PPANs[0][0] PPANs = tree2json(PPANs) OBJ = tree2json(OBJ[0][0]) obj = OBJ['NOUN'][0] nounArr = ['NNS','NP','NN'] for n in nounArr: if n in obj: obj = obj[n] break obj = obj[0] counter(PPANs['AN'][0],obj) for i in xrange(0,len(ANs)): ANJSON.append(tree2json(ANs[i])) for i in xrange(0,len(VBs)): VBJSON.append(tree2json(VBs[i])) for i in xrange(0,len(NAMEs)): NJSON.append(tree2json(NAMEs[i])) for i in xrange(0,len(VBs)): verbRoot = VBJSON[i]['VERB'] if 'VB' in verbRoot[0]: if verbRoot[0]['VB'][0] in ['open','close','shut','exit']: if isFile == True: actionSequence(verbRoot[0]['VB'][0],ANJSON,True) elif isDir == True: actionSequence(verbRoot[0]['VB'][0],ANJSON,False) elif verbRoot[0]['VB'][0] in ['make','create']: #if isDir == True: createSequence(verbRoot[0]['VB'][0],NJSON,str.rstrip('\n')) elif verbRoot[0]['VB'][0] in ['copy','cut','move','duplicate']: SOURCEs = tree2json(SOURCEs[0][0]) DESTs = tree2json(DESTs[0][0]) FILENAMEs = tree2json(FILENAMEs[0][0]) cutCopy(verbRoot[0]['VB'][0],FILENAMEs,SOURCEs,DESTs) elif verbRoot[0]['VB'][0] in ['change','replace']: changeWallpaper(verbRoot[0]['VB'][0],tree2json(TONAMEFILEs[0][0]))
def process_command(self, tree): flag = 0 eflag = 0 #print("Treeeeee") #print(tree.pretty_print()) for c in tree.subtrees(): if (c.label() != "multiple_command" and c.label() != "single_command"): #print(c.label()) #print("While in command") #print(c.pretty_print()) while (c is not None): flag = 0 eflag = 0 #print("Whileeee", c.label()) if c.label() == "empty_command": c = None eflag = 1 #print("Breaking") break if ((c != None) and c.label() == "comm_assign_expression"): #print("Come in ") var = None val = None flag1 = 0 for t in c.subtrees(): if (t.label() == "id"): for l in t.subtrees(): if (l.label() != "id"): var = l.label() if (t.label() == "expression_t"): #print("EXPPPPRRRRRRRRR", t.pretty_print()) val = self.process_expression(t) self.update(var, val) c = c.right_sibling() if (c == None): break #return if ((c is not None) and c.label() == "comm_for_identifier"): #print("calling for") self.process_for(c) c = c.right_sibling() if (c == None): break if ((c is not None) and c.label() == "comm_while_do"): #print("Calling while") self.process_while(c) c = c.right_sibling() if (c == None): break if ((c is not None) and (c.label() == "comm_if_then_else") or (c.label() == "comm_ternary")): #print("calling if ") self.process_if_ternary_conditional(c) c = c.right_sibling() if (c == None): break if ((c is not None) and c.label() == "comm_print_expr"): for ctree in c.subtrees(): if ctree.label() == "str_identifier": for l in ctree.subtrees(): if (l.label() != "id" and l.label() != "str_identifier"): print((l.label())) flag = 1 break elif ctree.label() == "expression_t": for l in ctree.subtrees(): if (l.label() != "id" and l.label() != "expression_t"): print(self.lookup(l.label())) flag = 1 break elif ctree.label() == "id": for l in ctree.subtrees(): if (l.label() != "id"): print(self.lookup(l.label())) flag = 1 break if (flag == 1): break c = c.right_sibling() if ((c is not None) and (c.label() == "multiple_command" or c.label() == "single_command")): self.process_command(c) c = c.right_sibling() if (c == None): break if ((c is not None) and c.label() == "value_num"): for ctree in c.subtrees(): if ctree.label() == "num": for l in ctree.subtrees(): if (l.label() != "num"): print(l.label()) break flag = 1 if (flag == 1): break c = c.right_sibling() if (c == None): break if (eflag == 1): #print("Break and return") break return