def preprocess1(org_qstn): orgqstn = org_qstn.split() #row[1] = row[1].dict_trim_by_keys(gl.text_analytics.stopwords(), exclude=True) k = "" for word in orgqstn: if word in ('.', ',', '?', '!', ';', ':'): orgqstn.remove(word) word = word.rstrip('.,!?;:') word = ab.main(word) word = spell.main(word) k = k + word + " " orgqstn = k.split() for word in orgqstn: if word.isdigit(): orgqstn.remove(word) elif word.lower() in qwords: orgqstn.remove(word) elif word.lower() in stop_word: orgqstn.remove(word) org_qstn = " ".join(orgqstn) return org_qstn
def preprocess1(org_qstn): qwords=('what','who','when','where','which','do','how', 'why','does',' ',"what's") #f=[w for w in word_t if not w in stopword] stop_word = set(stopwords.words('english')) orgqstn= org_qstn.split() k="" for word in orgqstn: if word in ('.',',','?','!',';',':'): orgqstn.remove(word) word = word.rstrip('.,!?;:') word=ab.main(word) word=spell.main(word) k = k+word+" " orgqstn= k.split() for word in orgqstn: if word.isdigit(): orgqstn.remove(word) elif word.lower() in qwords: orgqstn.remove(word) elif word.lower() in stop_word: orgqstn.remove(word) org_qstn=" ".join(orgqstn) return org_qstn
#!python # -*- coding: utf-8 -*- # (C) 2016-2018 Muthiah Annamalai import imp import sys try: reload # Python 2.7 except NameError: try: from importlib import reload # Python 3.4+ except ImportError: from imp import reload # Python 3.0 - 3.3 imp.reload(sys) # sys.setdefaultencoding('utf-8') # This file is part of open-tamil package import spell if __name__ == "__main__": spell.main()
#!python # -*- coding: utf-8 -*- #(C) 2016-2018 Muthiah Annamalai import sys try: reload # Python 2.7 except NameError: try: from importlib import reload # Python 3.4+ except ImportError: from imp import reload # Python 3.0 - 3.3 reload(sys) sys.setdefaultencoding('utf-8') #This file is part of open-tamil package import spell if __name__ == u"__main__": spell.main()