예제 #1
0
 def __init__(self):
     self.gen = Generator()
     self.cleanser = TextCleanser()
     gen = self.gen
     cln = self.cleanser
     self.cleanse_methods = {
         gen.IBM_SIM: cln.heuristic_cleanse,
         gen.SSK_SIM: cln.ssk_cleanse,
         gen.PHONETIC_ED_SIM: cln.phonetic_ED_cleanse
     }
예제 #2
0
 def __init__(self):
     self.cleanser = TextCleanser()
     cln = self.cleanser
     self.cleanse_methods = {
         'IBM': cln.ibm_cleanse,
         'SSK': cln.ssk_cleanse,
         'PHONETIC_ED': cln.phonetic_cleanse
     }
     self.gold_sent_clean = []
     self.gold_word_pairs = []
     self.gold_sent_pairs = []
예제 #3
0
class CleanserWebService():
    def __init__(self):
        self.tc = TextCleanser()

    @expose
    def clean(self, text):
        cleantext, error, replacements = self.tc.ssk_cleanse(text)
        if error == "":
            return cleantext
        else:
            # an error occurred
            return error
예제 #4
0
class CleanserWebService():
    
    def __init__(self):
        self.tc = TextCleanser()
    
    @expose
    def clean(self, text):
        cleantext, error, replacements = self.tc.ssk_cleanse(text)
        if error=="":
            return cleantext
        else:
            # an error occurred
            return error
예제 #5
0
 def __init__(self):
     self.tc = TextCleanser()
예제 #6
0
    as input on stdin and outputs normalised strings on stdout. 
    
    Author: Stephan Gouws
    Contact: [email protected]    
"""

from cleanser import TextCleanser
import json
import codecs
import getopt
import sys, time
from random import choice

if __name__ == '__main__':
    #    print "Noisy text cleanser"
    clnsr = TextCleanser()

    text = sys.stdin.readline()
    while (text):
        if len(text) <= 1:
            break
#        cleantext,error,replacements = clnsr.heuristic_cleanse(text, gen_off_by_ones=False)
# to use a phonetic edit-distance based similarity function, use the
# method below:
#         cleantext,error,replacements = clnsr.phonetic_ED_cleanse(text, gen_off_by_ones=False)
# to use SSK-based cleanser, use
        cleantext, error, replacements = clnsr.ssk_cleanse(
            text, gen_off_by_ones=False)

        if error == "ERROR":
            sys.stderr.write("ERROR")
예제 #7
0
 def __init__(self):
     self.tc = TextCleanser()
예제 #8
0
    as input on stdin and outputs normalised strings on stdout. 
    
    Author: Stephan Gouws
    Contact: [email protected]    
"""

from cleanser import TextCleanser
import json
import codecs
import getopt
import sys, time
from random import choice

if __name__ == '__main__':
#    print "Noisy text cleanser"
    clnsr = TextCleanser() 
    
    text=sys.stdin.readline()
    while (text):
        if len(text)<=1:
            break
#        cleantext,error,replacements = clnsr.heuristic_cleanse(text, gen_off_by_ones=False)
        # to use a phonetic edit-distance based similarity function, use the
        # method below:         
#         cleantext,error,replacements = clnsr.phonetic_ED_cleanse(text, gen_off_by_ones=False)
        # to use SSK-based cleanser, use
        cleantext,error,replacements = clnsr.ssk_cleanse(text, gen_off_by_ones=False)
        
        if error=="ERROR":
            sys.stderr.write("ERROR")
            continue