def proofread(self): # our API key for AfterTheDeadline ATD.setDefaultKey(hash("DoubleCheck")) # check the document for grammar and spelling errors errors = ATD.checkDocument(self.raw) ''' # print the errors for error in errors: print "%s error for: %s **%s**" % (error.type, error.precontext, error.string) print "some suggestions: %s" % (", ".join(error.suggestions),) ''' # write the errors to the database err2db = [{"type": error.type, "precontext": error.precontext, "string": error.string, "suggestions": error.suggestions} for error in errors] json_entry = json.dumps(err2db, sort_keys=True, indent=4) #self.database.put('/documents/' + self.author + "/" + self.filename + "/proofread/", err2db) #self.database.put(err2db) #print err2db #result = self.database.post('/proofreads/' + self.author + '/' + self.filename[:-5], err2db) return err2db
def grammar_score_atd_raw(some_text): ATD.setDefaultKey("cfgen call " + rand_str(5)) wait_time = 1 have_answer = False while not have_answer: try: error_list = ATD.checkDocument(some_text) have_answer = True except: print('ATD API is having trouble!') print('waiting for: ' + str(wait_time) + ' sec.') time.sleep(wait_time) ATD.setDefaultKey("cfgen call " + rand_str(5)) wait_time = wait_time + 5 all_errors = [] for error in error_list: if error.type != 'grammar': continue else: all_errors.append((error.string, error.precontext)) return all_errors
def correct_string(self, text, ensure_length=False): tokens = text.split() errors = ATD.checkDocument(text) subs = {} for error in list(errors): l_suggestions = list(error.suggestions) if error.description == "Spelling" and len(l_suggestions) > 0: subs[error.string] = l_suggestions[0] for i, t in enumerate(list(tokens)): if t in subs: if ensure_length: tokens[i] = subs[t].split()[0] else: tokens[i] = subs[t] return " ".join(tokens)
def main(args=None): parser = argparse.ArgumentParser() parser.add_argument('textfile', help='filename of the text you want to check.') parser.add_argument('--key', help='Your unique API Key') args = parser.parse_args() fname = args.textfile key = args.key if key == None: ATD.setDefaultKey("your API key") else: ATD.setDefaultKey(key) f = open(fname, 'r') text = f.read() errors = ATD.checkDocument(text) for error in errors: print "%s error for: %s **%s**" % (error.type, error.precontext, error.string) print "some suggestions: %s" % (", ".join(error.suggestions), )
# solution use http://www.afterthedeadline.com/api.slp # and https://bitbucket.org/miguelventura/after_the_deadline/ import imp import sys import os.path basepath = os.path.dirname(__file__) sys.path.append(os.path.abspath(os.path.join(basepath, '..', 'modules/ATD'))) import ATD i150 = imp.load_source('i150', os.path.abspath(os.path.join(basepath, '..', 'intermediate/150.py'))) # random key ATD.setDefaultKey("9WdSTQHB2fg43cHVXZIbjJja5xxHzaMAAt4YJAWRykk=") puzzle = i150.readPuzzle() for solution in i150.solve(puzzle, ''): errors = ATD.checkDocument(solution) if len(errors) == 0: print "Solution '%s' is good" % (solution) else: print "Solution '%s' is bad. Reasons:" % (solution) for error in errors: print "%s error for: %s **%s**" % (error.type, error.precontext, error.string) print "some suggestions: %s" % (", ".join(error.suggestions),)
def correct_word(self, word): errors = ATD.checkDocument(word) for error in errors: if error.description == "Spelling" and error.suggestions: return error.suggestions[0] return word
def filter_typos(): global possible_typos_list for word in possible_typos: possible_typos_list.append(word) possible_typos_list += possible_typos[word] possible_typos_list = list(set(possible_typos_list)) for word in possible_typos_list: if word in words_dict.keys(): if words_dict[word] < 4: typos_prob[word] = words_dict[word] possible_typos_list = typos_prob.keys() print "Consulting After the Deadline webservice" ATD.setDefaultKey("dnava2cac04bcd5e1f7b3749e4fc8107f4f72") for possible_typo in possible_typos_list: if possible_typo in consulted: continue if isinstance(possible_typo,unicode): word = possible_typo.encode('UTF-8') else: word = possible_typo append = True for language in ['es', 'en']: if language == 'en' and isinstance(word.decode('UTF-8'),unicode): word = unidecode(word.decode('UTF-8')) ATD.setLanguage(language) time.sleep(5) try: errors = ATD.checkDocument(word) except Exception as e: pickle.dump( consulted, open( consulted_file, "wb" ) ) pickle.dump( typos, open( typos_file, "wb" ) ) time.sleep(30) errors = ATD.checkDocument(word) if errors: for error in errors: if len(error.suggestions) > 0: for correction in error.suggestions: if word.lower() == correction.lower(): append = False if isinstance(correction,unicode): if word.lower() == unidecode(correction.lower()): corrections_from_atd[word] = correction else: append = False if len(errors) == 0: append = False if not append: break if append: if possible_typo not in typos: typos.append(possible_typo) consulted.append(possible_typo) pickle.dump( consulted, open( consulted_file, "wb" ) ) pickle.dump( typos, open( typos_file, "wb" ) ) pickle.dump( corrections_from_atd, open( corrections_from_atd_file, "wb" ) ) #ATD webservice doesn't contains common people name or places. #So I downloaded a dump from geonames.org webservice to cross-validate, #if a word is really a word or just a typo. places = get_places_and_names() print "Cross-validating with geonames" print "{0} typos before".format(len(typos)) for word in typos: if word.encode("UTF-8") in places.keys(): typos.remove(word) print "{0} typos after".format(len(typos)) #If a word was deleted by the pass filters, it means that at some point #it was determined to be correct. And if a word is still in typos list #at this point, means that the previous steps failed to determine if #it is a valid, correctly spelled word. for word in possible_typos: if word in typos: typos_dict[word] = possible_typos[word] else: for possible_correction in possible_typos[word]: if possible_correction in typos: if word not in typos_dict.keys(): typos_dict[word] = [] typos_dict[word].append(possible_correction) #Copying corrections from ATD for word in corrections_from_atd: if type(corrections_from_atd[word]) == list: corrections_dict[word] = corrections_from_atd[word][0].capitalize() else: corrections_dict[word] = corrections_from_atd[word].capitalize() #At this point I have a dict called typos_dict with holds words and their #possible correction, but I don't know if the correction is in the key or #in the value, this section of the program determines which is the correction #and which is in the typo and uses the function select_corrections to make #automatic corrections. for word in typos_dict.keys(): if word in typos: for possible_solution in typos_dict[word]: if possible_solution not in typos: if word not in corrections_dict.keys(): if select_corrections(word,possible_solution,manual_corrections): corrections_dict[word] = possible_solution if word in typos_dict.keys(): del typos_dict[word] else: for misspelled in typos_dict[word]: if misspelled in typos: if misspelled not in corrections_dict.keys(): if select_corrections(misspelled,word,manual_corrections): corrections_dict[misspelled] = word if word in typos_dict.keys(): del typos_dict[word] pickle.dump( manual_corrections, open( manual_corrections_file, "wb" ) )
lines = sent_tokenize(file_content) lines = [line for line in lines if line != '' and len(word_tokenize(line)) <= 10 and line[-1] in '.?!' and line[0].isupper()] print(len(lines)) wrong_lines_count = 0 pic_count = 0 for i, line in enumerate(lines): if wrong_lines_count == 5: break print('Original line: ' + line) tree = next(parser.raw_parse(line)) if pic_count < 5 and word_tokenize(line) == 10: filename = get_valid_filename(line) TreeView(tree)._cframe.print_to_file(filename + '.ps') pic_count += 1 errors = ATD.checkDocument(line) if len(list(errors)) == 0: print('**No errors** ({}/{})'.format(i + 1, len(lines))) continue else: print() correct_line = correct(line, errors) tree.pretty_print() print('Correct line: ' + correct_line) correct_tree = next(parser.raw_parse(correct_line)) correct_tree.pretty_print() wrong_lines_count += 1 print('Number of wrong sentences: {}'.format(wrong_lines_count))
ATD.setDefaultKey("GEC_f5d029b602c7b98794329473432") #sentences = ["Looking too the water. Fixing your writing typoss.", # "THis sentence is corrupt. Or is it? Who knows, really? Please test it out. Bruno is an good dog. Good dogs is rare."] if len(sys.argv) < 2: print "Need input filename" sys.exit(1) filename = sys.argv[1] f = open(filename, "r") sentences = f.readlines() for s in sentences: errors = ATD.checkDocument(s) s_unicode = s.decode('utf-8') s_corr = s_unicode.encode('utf-8') s_corr = s_corr.rstrip() for error in errors: print >> sys.stderr, ("%s error for: %s **%s**. Description: %s" % (error.type, error.precontext, error.string, error.description)).encode('utf-8') if not "Diacritical" in error.description: print >> sys.stderr, ( "some suggestions: %s" % (", ".join(error.suggestions), )).encode('utf-8') if error.suggestions: s_corr = s_corr.replace(error.string.encode('utf-8'), error.suggestions[0].encode('utf-8')) print s_corr
import ATD ATD.setDefaultKey("paraphrase#thisisit") errors = ATD.checkDocument("This is are apple") print(list(errors)) # metrics = ATD.stats("Looking too the water. Fixing your writing typoss.") # print([str(m) for m in metrics]) # # for error in errors: # print ("%s error for: %s **%s**" % (error.type, error.precontext, error.string)) # print ("some suggestions: %s" % (", ".join(error.suggestions),)) #