예제 #1
0
	def proofread(self):
                # our API key for AfterTheDeadline
		ATD.setDefaultKey(hash("DoubleCheck")) 

		# check the document for grammar and spelling errors 	
		errors = ATD.checkDocument(self.raw)

		'''
		# print the errors
		for error in errors: 	
			print "%s error for: %s **%s**" % (error.type, error.precontext, error.string)
			print "some suggestions: %s" % (", ".join(error.suggestions),)
		'''

		# write the errors to the database
		err2db = [{"type":        error.type,
			   "precontext":  error.precontext,
			   "string":      error.string,
			   "suggestions": error.suggestions} for error in errors] 

		json_entry = json.dumps(err2db, sort_keys=True, indent=4)
		#self.database.put('/documents/' + self.author + "/" + self.filename + "/proofread/", err2db) 
		#self.database.put(err2db) 
                #print err2db
		#result = self.database.post('/proofreads/' + self.author + '/' + self.filename[:-5], err2db)
		return err2db
예제 #2
0
def grammar_score_atd_raw(some_text):

    ATD.setDefaultKey("cfgen call " + rand_str(5))

    wait_time = 1
    have_answer = False

    while not have_answer:
        try:
            error_list = ATD.checkDocument(some_text)
            have_answer = True
        except:
            print('ATD API is having trouble!')
            print('waiting for: ' + str(wait_time) + ' sec.')
            time.sleep(wait_time)
            ATD.setDefaultKey("cfgen call " + rand_str(5))
            wait_time = wait_time + 5

    all_errors = []
    for error in error_list:
        if error.type != 'grammar':
            continue
        else:
            all_errors.append((error.string, error.precontext))
    return all_errors
예제 #3
0
 def correct_string(self, text, ensure_length=False):
     tokens = text.split()
     errors = ATD.checkDocument(text)
     subs = {}
     for error in list(errors):
         l_suggestions = list(error.suggestions)
         if error.description == "Spelling" and len(l_suggestions) > 0:
             subs[error.string] = l_suggestions[0]
     for i, t in enumerate(list(tokens)):
         if t in subs:
             if ensure_length:
                 tokens[i] = subs[t].split()[0]
             else:
                 tokens[i] = subs[t]
     return " ".join(tokens)
예제 #4
0
def main(args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('textfile',
                        help='filename of the text you want to check.')
    parser.add_argument('--key', help='Your unique API Key')
    args = parser.parse_args()
    fname = args.textfile
    key = args.key
    if key == None:
        ATD.setDefaultKey("your API key")
    else:
        ATD.setDefaultKey(key)
    f = open(fname, 'r')
    text = f.read()
    errors = ATD.checkDocument(text)
    for error in errors:
        print "%s error for: %s **%s**" % (error.type, error.precontext,
                                           error.string)
        print "some suggestions: %s" % (", ".join(error.suggestions), )
예제 #5
0
# solution use http://www.afterthedeadline.com/api.slp
# and https://bitbucket.org/miguelventura/after_the_deadline/

import imp
import sys
import os.path

basepath = os.path.dirname(__file__)
sys.path.append(os.path.abspath(os.path.join(basepath, '..', 'modules/ATD')))

import ATD

i150 = imp.load_source('i150', os.path.abspath(os.path.join(basepath, '..', 'intermediate/150.py')))

# random key
ATD.setDefaultKey("9WdSTQHB2fg43cHVXZIbjJja5xxHzaMAAt4YJAWRykk=")

puzzle = i150.readPuzzle()

for solution in i150.solve(puzzle, ''):
	errors = ATD.checkDocument(solution)
	if len(errors) == 0:
		print "Solution '%s' is good" % (solution)
	else:
		print "Solution '%s' is bad. Reasons:" % (solution)

		for error in errors:
			print "%s error for: %s **%s**" % (error.type, error.precontext, error.string)
			print "some suggestions: %s" % (", ".join(error.suggestions),)
예제 #6
0
 def correct_word(self, word):
     errors = ATD.checkDocument(word)
     for error in errors:
         if error.description == "Spelling" and error.suggestions:
             return error.suggestions[0]
     return word
def filter_typos():    
    global possible_typos_list
    
    for word in possible_typos:
        possible_typos_list.append(word)
        possible_typos_list += possible_typos[word]
    
        possible_typos_list = list(set(possible_typos_list))
    
    
    for word in  possible_typos_list:
        if word in words_dict.keys():
            if words_dict[word] < 4:
                typos_prob[word] = words_dict[word]
    
    possible_typos_list = typos_prob.keys() 
    
    print "Consulting After the Deadline webservice"
    ATD.setDefaultKey("dnava2cac04bcd5e1f7b3749e4fc8107f4f72")
    
    
    for possible_typo in possible_typos_list:
        if possible_typo in consulted:
            continue
        
        if isinstance(possible_typo,unicode):
            word = possible_typo.encode('UTF-8')
        else:
            word = possible_typo
        
        append = True
        for language in ['es', 'en']:
            if language == 'en' and isinstance(word.decode('UTF-8'),unicode):
                word = unidecode(word.decode('UTF-8'))
            ATD.setLanguage(language)
            time.sleep(5)
            try:
                errors = ATD.checkDocument(word)
            except Exception as e:
                pickle.dump( consulted, open( consulted_file, "wb" ) )
                pickle.dump( typos, open( typos_file, "wb" ) )
                time.sleep(30)
                errors = ATD.checkDocument(word)
                
            if errors:
                for error in errors:
                    
                    if len(error.suggestions) > 0:
                        
                        for correction in error.suggestions:
                            if word.lower() == correction.lower():
                                append = False
                            if isinstance(correction,unicode):
                                if word.lower() == unidecode(correction.lower()):
                                    corrections_from_atd[word] = correction
            else:
                append = False
    
            if len(errors) == 0:
                append = False
                
            if not append:
                break
            
            
        if append:
            if possible_typo not in typos:
                typos.append(possible_typo)
        consulted.append(possible_typo)                    
                    
    
    pickle.dump( consulted, open( consulted_file, "wb" ) )
    pickle.dump( typos, open( typos_file, "wb" ) )
    pickle.dump( corrections_from_atd, open( corrections_from_atd_file, "wb" ) )



    #ATD webservice doesn't contains common people name or places.
    #So I downloaded a dump from geonames.org webservice to cross-validate,
    #if a word is really a word or just a typo.
    places  = get_places_and_names()
    print "Cross-validating with geonames"
    print "{0} typos before".format(len(typos))
    for word in typos:
        if word.encode("UTF-8") in places.keys():
            typos.remove(word)
    print "{0} typos after".format(len(typos))


    #If a word was deleted by the pass filters, it means that at some point
    #it was determined to be correct. And if a word is still in typos list
    #at this point, means that the previous steps failed to determine if
    #it is a valid, correctly spelled word.
    for word in possible_typos:
        if word in typos:
            typos_dict[word] = possible_typos[word]
        else:
            for possible_correction in possible_typos[word]:
                if possible_correction in typos:
                    if word not in typos_dict.keys():
                        typos_dict[word] = []
                    typos_dict[word].append(possible_correction)
                

    #Copying corrections from ATD
    for word in corrections_from_atd:
        if type(corrections_from_atd[word]) == list:
            corrections_dict[word] = corrections_from_atd[word][0].capitalize()
        else:
            corrections_dict[word] = corrections_from_atd[word].capitalize()
    
    
    #At this point I have a dict called typos_dict with holds words and their
    #possible correction, but I don't know if the correction is in the key or
    #in the value, this section of the program determines which is the correction
    #and which is in the typo and uses the function select_corrections to make
    #automatic corrections.
    for word in typos_dict.keys():
        if word in typos:
            for possible_solution in typos_dict[word]:
                if possible_solution not in typos:
                    if word not in corrections_dict.keys():
                        if select_corrections(word,possible_solution,manual_corrections):
                            corrections_dict[word] = possible_solution
                            if word in typos_dict.keys():
                                del typos_dict[word]
        else:
            for misspelled in typos_dict[word]:
                if misspelled in typos:
                    if misspelled not in corrections_dict.keys():
                        if select_corrections(misspelled,word,manual_corrections):
                            corrections_dict[misspelled] = word
                            if word in typos_dict.keys():
                                del typos_dict[word]    
    
    pickle.dump( manual_corrections, open( manual_corrections_file, "wb" ) )
예제 #8
0
    lines = sent_tokenize(file_content)
    lines = [line for line in lines if line != ''
             and len(word_tokenize(line)) <= 10
             and line[-1] in '.?!'
             and line[0].isupper()]
    print(len(lines))
    wrong_lines_count = 0
    pic_count = 0
    for i, line in enumerate(lines):
        if wrong_lines_count == 5:
            break
        print('Original line: ' + line)
        tree = next(parser.raw_parse(line))
        if pic_count < 5 and word_tokenize(line) == 10:
            filename = get_valid_filename(line)
            TreeView(tree)._cframe.print_to_file(filename + '.ps')
            pic_count += 1
        errors = ATD.checkDocument(line)
        if len(list(errors)) == 0:
            print('**No errors** ({}/{})'.format(i + 1, len(lines)))
            continue
        else:
            print()
        correct_line = correct(line, errors)
        tree.pretty_print()
        print('Correct line: ' + correct_line)
        correct_tree = next(parser.raw_parse(correct_line))
        correct_tree.pretty_print()
        wrong_lines_count += 1
    print('Number of wrong sentences: {}'.format(wrong_lines_count))
예제 #9
0
ATD.setDefaultKey("GEC_f5d029b602c7b98794329473432")

#sentences = ["Looking too the water. Fixing your writing typoss.",
#	"THis sentence is corrupt. Or is it? Who knows, really? Please test it out. Bruno is an good dog. Good dogs is rare."]

if len(sys.argv) < 2:
    print "Need input filename"
    sys.exit(1)

filename = sys.argv[1]
f = open(filename, "r")
sentences = f.readlines()

for s in sentences:
    errors = ATD.checkDocument(s)
    s_unicode = s.decode('utf-8')
    s_corr = s_unicode.encode('utf-8')
    s_corr = s_corr.rstrip()
    for error in errors:
        print >> sys.stderr, ("%s error for: %s **%s**. Description: %s" %
                              (error.type, error.precontext, error.string,
                               error.description)).encode('utf-8')
        if not "Diacritical" in error.description:
            print >> sys.stderr, (
                "some suggestions: %s" %
                (", ".join(error.suggestions), )).encode('utf-8')
            if error.suggestions:
                s_corr = s_corr.replace(error.string.encode('utf-8'),
                                        error.suggestions[0].encode('utf-8'))
    print s_corr
예제 #10
0
import ATD

ATD.setDefaultKey("paraphrase#thisisit")
errors = ATD.checkDocument("This is are apple")
print(list(errors))
# metrics = ATD.stats("Looking too the water. Fixing your writing typoss.")
# print([str(m) for m in metrics])
#
# for error in errors:
#     print ("%s error for: %s **%s**" % (error.type, error.precontext, error.string))
#     print ("some suggestions: %s" % (", ".join(error.suggestions),))
#