def nlp_nlidb(question): '''STEP1: Extracting the keywords using Parser:''' tree = stanford_client.to_tree(question) top_node = penn_treebank_node.parse(tree) extracted_words, Proper_Nouns = my_parser.key_words(top_node, question) question_type = my_parser.questionType(top_node) '''STEP2: Replace words with glossary terms:''' uniqueWords = glossary.generalizedKeywords(question, extracted_words) # '''STEP2.2: Extracting the related keywords:''' uniqueWords.append(question_type) '''STEP3: Adding some manually defined rules''' allWords, conditions, target, component_value = answerGenerator(question, uniqueWords) # To be handled differently, this is here only so the master branch can run. allWords = filter(lambda x: x != None, allWords) allWords = allWords + list(target) allWords = set(allWords) '''Creating Links between allWords and tables' entities''' tables = set(semanticNet.tables(allWords)) '''required_values is not being used in our system anymore''' required_values ='' debug.debug_statement([allWords, required_values, target, conditions, tables, question_type, Proper_Nouns, component_value]) return allWords, required_values, target, conditions, tables, question_type, Proper_Nouns, component_value
def _formatKeyWords(keyWords): nouns = [] verbs = [] adjs_prpos = [] to_remove = [] extracted_words = [] # PPN = [] # Check that at least 1 keyword was found. all_keywords = [item for sublist in keyWords.values() for item in sublist] if len(all_keywords) < 1: debug.debug_statement("No keywords found!") else: for n in keyWords['Nouns']: if n!= None: # n_lemmatized = WN_Lemmatizer().lemmatize(n.word) # nouns.append(str(n_lemmatized)) nouns.append(str(n.word)) for v in keyWords['Verbs']: "remove the auxiliary verb 'to be':" if en.verb.infinitive(v) == 'be': to_remove.append(v) if v!= None: # v_stemmed = PStemmer().stem(v.word) # verbs.append(str(v_stemmed)) verbs.append(str(v.word)) for adj in keyWords['Adjectives and Propositions']: if adj!= None: adjs_prpos.append(str(adj.word)) # for pr_n in keyWords['Proper Nouns']: # if pr_n != None: # PPN.append(pr_n) verbs = [x for x in verbs if x not in to_remove] '''combine all key words extracted for each category:''' extracted_words = nouns + verbs + adjs_prpos while '' in extracted_words: extracted_words.remove('') # print extracted_words return extracted_words