Beispiel #1
0
def preprocess1(org_qstn):
    orgqstn = org_qstn.split()
    #row[1] = row[1].dict_trim_by_keys(gl.text_analytics.stopwords(), exclude=True)
    k = ""
    for word in orgqstn:
        if word in ('.', ',', '?', '!', ';', ':'):
            orgqstn.remove(word)
        word = word.rstrip('.,!?;:')
        word = ab.main(word)
        word = spell.main(word)
        k = k + word + " "
    orgqstn = k.split()
    for word in orgqstn:
        if word.isdigit():
            orgqstn.remove(word)
        elif word.lower() in qwords:
            orgqstn.remove(word)

        elif word.lower() in stop_word:
            orgqstn.remove(word)

    org_qstn = " ".join(orgqstn)
    return org_qstn
Beispiel #2
0
def preprocess1(org_qstn):
	qwords=('what','who','when','where','which','do','how', 'why','does',' ',"what's")
    #f=[w for w in word_t if not w in stopword]
	stop_word = set(stopwords.words('english'))
	orgqstn= org_qstn.split()
	k=""
	for word in orgqstn:
		if word in ('.',',','?','!',';',':'):
			orgqstn.remove(word)
		word =  word.rstrip('.,!?;:')
		word=ab.main(word)
		word=spell.main(word)
		k = k+word+" "
	orgqstn= k.split()
	for word in orgqstn:
		if word.isdigit():
			orgqstn.remove(word)
		elif word.lower() in qwords:
			orgqstn.remove(word)
		elif word.lower() in stop_word:
			orgqstn.remove(word)
	org_qstn=" ".join(orgqstn)
	return org_qstn
Beispiel #3
0
#!python
# -*- coding: utf-8 -*-
# (C) 2016-2018 Muthiah Annamalai
import imp
import sys

try:
    reload  # Python 2.7
except NameError:
    try:
        from importlib import reload  # Python 3.4+
    except ImportError:
        from imp import reload  # Python 3.0 - 3.3

imp.reload(sys)
# sys.setdefaultencoding('utf-8')

# This file is part of open-tamil package
import spell

if __name__ == "__main__":
    spell.main()
#!python
# -*- coding: utf-8 -*-
#(C) 2016-2018 Muthiah Annamalai
import sys
try:
    reload  # Python 2.7
except NameError:
    try:
        from importlib import reload  # Python 3.4+
    except ImportError:
        from imp import reload  # Python 3.0 - 3.3


reload(sys)
sys.setdefaultencoding('utf-8')

#This file is part of open-tamil package
import spell

if __name__ == u"__main__":
    spell.main()