def processQueryToDoImprovedLemmatization(self, posTags):
     lemmas = []
     wnl = WordNetLemmatizer()
     ic = IndexCreation()
     for word, tag in posTags:
         wnTag = ic.getWordnetTag(tag)
         if wnTag is None:
             lemmas.append(wnl.lemmatize(word))
         else:
             lemmas.append(wnl.lemmatize(word, pos=wnTag))
     return lemmas
 def getArticleAndWordCount(self, path):
     print("Number of articles:", str(len(os.listdir(path))))
     indexSentenceMap = collections.OrderedDict()
     wordCount = 0
     ic = IndexCreation()
     data = ic.readArticles(path)
     data = ic.removeArticleTitle(data)
     for i in range(0, len(data)):
         for j in range(0, len(data[i])):
             tokenizedWords = word_tokenize(data[i][j])
             index = 'A' + str(i + 1) + 'S' + str(j + 1)
             indexSentenceMap[index] = data[i][j]
             wordCount += len(tokenizedWords)
     print("Number of words in the corpus:", str(wordCount))
     return indexSentenceMap
 def processQueryToExtractImprovisedHolonyms(self, posTags):
     holonyms = []
     for word, tag in posTags:
         wnTag = IndexCreation().getWordnetTag(tag)
         if wnTag is not None:
             synset = wn.synsets(word, pos=wnTag)
         else:
             synset = wn.synsets(word)
         if len(synset) > 0:
             if len(synset[0].part_holonyms()) > 0:
                 holonyms.append(
                     synset[0].part_holonyms()[0].name().split('.')[0])
     return holonyms
 def processQueryToExtractImprovisedHeadWord(self, query):
     dependency_parser = CoreNLPDependencyParser('http://localhost:9000')
     headWord = None
     parsedSentence = list(dependency_parser.raw_parse(query))[0]
     rootValue = list(
         list(parsedSentence.nodes.values())[0]['deps']['ROOT'])[0]
     for n in parsedSentence.nodes.values():
         if n['address'] == rootValue:
             headWord = n['word']
             if len(headWord):
                 _, tag = pos_tag([headWord])[0]
                 wnTag = IndexCreation().getWordnetTag(tag)
                 if wnTag is not None:
                     synset = wn.synsets(headWord, pos=wnTag)
                     if len(synset) > 0:
                         headWord = synset[0].name().split('.')[0]
             break
     return headWord