def grammar_score_atd_raw(some_text): ATD.setDefaultKey("cfgen call " + rand_str(5)) wait_time = 1 have_answer = False while not have_answer: try: error_list = ATD.checkDocument(some_text) have_answer = True except: print('ATD API is having trouble!') print('waiting for: ' + str(wait_time) + ' sec.') time.sleep(wait_time) ATD.setDefaultKey("cfgen call " + rand_str(5)) wait_time = wait_time + 5 all_errors = [] for error in error_list: if error.type != 'grammar': continue else: all_errors.append((error.string, error.precontext)) return all_errors
def proofread(self): # our API key for AfterTheDeadline ATD.setDefaultKey(hash("DoubleCheck")) # check the document for grammar and spelling errors errors = ATD.checkDocument(self.raw) ''' # print the errors for error in errors: print "%s error for: %s **%s**" % (error.type, error.precontext, error.string) print "some suggestions: %s" % (", ".join(error.suggestions),) ''' # write the errors to the database err2db = [{"type": error.type, "precontext": error.precontext, "string": error.string, "suggestions": error.suggestions} for error in errors] json_entry = json.dumps(err2db, sort_keys=True, indent=4) #self.database.put('/documents/' + self.author + "/" + self.filename + "/proofread/", err2db) #self.database.put(err2db) #print err2db #result = self.database.post('/proofreads/' + self.author + '/' + self.filename[:-5], err2db) return err2db
def generate_article_score(content): ATD.setDefaultKey(settings.ATD_API_KEY) metrics = ATD.stats(content) error_types = ['grammar','spell','style'] error_count = 0 word_count = 0 grammar_error_count = 0 spell_error_count = 0 style_error_count = 0 for m in metrics: if m.type in error_types: error_count+=m.value if m.type == error_types[0]: grammar_error_count+=m.value if m.type == error_types[1]: spell_error_count+=m.value if m.type == error_types[2]: style_error_count+=m.value if m.type == 'stats' and m.key == 'words': word_count = m.value return_dict = { 'error_count':error_count, 'word_count':word_count, 'grammar_error_count': grammar_error_count, 'spell_error_count': spell_error_count, 'style_error_count': style_error_count, } return return_dict
def check_grammar(text): ATD.setDefaultKey("ENTER_YOUR_API_KEY") metrics = ATD.stats(text) spell_count = 0 grammar_count = 0 for i in [str(m) for m in metrics]: if is_subseq('spell', i): spell_count += int(re.search(r'[\d]+', i).group(0)) elif is_subseq('grammar', i): grammar_count += int(re.search(r'[\d]+', i).group(0)) return [grammar_count, spell_count]
def main(args=None): parser = argparse.ArgumentParser() parser.add_argument('textfile', help='filename of the text you want to check.') parser.add_argument('--key', help='Your unique API Key') args = parser.parse_args() fname = args.textfile key = args.key if key == None: ATD.setDefaultKey("your API key") else: ATD.setDefaultKey(key) f = open(fname, 'r') text = f.read() errors = ATD.checkDocument(text) for error in errors: print "%s error for: %s **%s**" % (error.type, error.precontext, error.string) print "some suggestions: %s" % (", ".join(error.suggestions), )
def correct_string(self, text, ensure_length=False): tokens = text.split() errors = ATD.checkDocument(text) subs = {} for error in list(errors): l_suggestions = list(error.suggestions) if error.description == "Spelling" and len(l_suggestions) > 0: subs[error.string] = l_suggestions[0] for i, t in enumerate(list(tokens)): if t in subs: if ensure_length: tokens[i] = subs[t].split()[0] else: tokens[i] = subs[t] return " ".join(tokens)
# solution use http://www.afterthedeadline.com/api.slp # and https://bitbucket.org/miguelventura/after_the_deadline/ import imp import sys import os.path basepath = os.path.dirname(__file__) sys.path.append(os.path.abspath(os.path.join(basepath, '..', 'modules/ATD'))) import ATD i150 = imp.load_source('i150', os.path.abspath(os.path.join(basepath, '..', 'intermediate/150.py'))) # random key ATD.setDefaultKey("9WdSTQHB2fg43cHVXZIbjJja5xxHzaMAAt4YJAWRykk=") puzzle = i150.readPuzzle() for solution in i150.solve(puzzle, ''): errors = ATD.checkDocument(solution) if len(errors) == 0: print "Solution '%s' is good" % (solution) else: print "Solution '%s' is bad. Reasons:" % (solution) for error in errors: print "%s error for: %s **%s**" % (error.type, error.precontext, error.string) print "some suggestions: %s" % (", ".join(error.suggestions),)
def correct_word(self, word): errors = ATD.checkDocument(word) for error in errors: if error.description == "Spelling" and error.suggestions: return error.suggestions[0] return word
def __init__(self): ATD.setDefaultKey("break-it")
def filter_typos(): global possible_typos_list for word in possible_typos: possible_typos_list.append(word) possible_typos_list += possible_typos[word] possible_typos_list = list(set(possible_typos_list)) for word in possible_typos_list: if word in words_dict.keys(): if words_dict[word] < 4: typos_prob[word] = words_dict[word] possible_typos_list = typos_prob.keys() print "Consulting After the Deadline webservice" ATD.setDefaultKey("dnava2cac04bcd5e1f7b3749e4fc8107f4f72") for possible_typo in possible_typos_list: if possible_typo in consulted: continue if isinstance(possible_typo,unicode): word = possible_typo.encode('UTF-8') else: word = possible_typo append = True for language in ['es', 'en']: if language == 'en' and isinstance(word.decode('UTF-8'),unicode): word = unidecode(word.decode('UTF-8')) ATD.setLanguage(language) time.sleep(5) try: errors = ATD.checkDocument(word) except Exception as e: pickle.dump( consulted, open( consulted_file, "wb" ) ) pickle.dump( typos, open( typos_file, "wb" ) ) time.sleep(30) errors = ATD.checkDocument(word) if errors: for error in errors: if len(error.suggestions) > 0: for correction in error.suggestions: if word.lower() == correction.lower(): append = False if isinstance(correction,unicode): if word.lower() == unidecode(correction.lower()): corrections_from_atd[word] = correction else: append = False if len(errors) == 0: append = False if not append: break if append: if possible_typo not in typos: typos.append(possible_typo) consulted.append(possible_typo) pickle.dump( consulted, open( consulted_file, "wb" ) ) pickle.dump( typos, open( typos_file, "wb" ) ) pickle.dump( corrections_from_atd, open( corrections_from_atd_file, "wb" ) ) #ATD webservice doesn't contains common people name or places. #So I downloaded a dump from geonames.org webservice to cross-validate, #if a word is really a word or just a typo. places = get_places_and_names() print "Cross-validating with geonames" print "{0} typos before".format(len(typos)) for word in typos: if word.encode("UTF-8") in places.keys(): typos.remove(word) print "{0} typos after".format(len(typos)) #If a word was deleted by the pass filters, it means that at some point #it was determined to be correct. And if a word is still in typos list #at this point, means that the previous steps failed to determine if #it is a valid, correctly spelled word. for word in possible_typos: if word in typos: typos_dict[word] = possible_typos[word] else: for possible_correction in possible_typos[word]: if possible_correction in typos: if word not in typos_dict.keys(): typos_dict[word] = [] typos_dict[word].append(possible_correction) #Copying corrections from ATD for word in corrections_from_atd: if type(corrections_from_atd[word]) == list: corrections_dict[word] = corrections_from_atd[word][0].capitalize() else: corrections_dict[word] = corrections_from_atd[word].capitalize() #At this point I have a dict called typos_dict with holds words and their #possible correction, but I don't know if the correction is in the key or #in the value, this section of the program determines which is the correction #and which is in the typo and uses the function select_corrections to make #automatic corrections. for word in typos_dict.keys(): if word in typos: for possible_solution in typos_dict[word]: if possible_solution not in typos: if word not in corrections_dict.keys(): if select_corrections(word,possible_solution,manual_corrections): corrections_dict[word] = possible_solution if word in typos_dict.keys(): del typos_dict[word] else: for misspelled in typos_dict[word]: if misspelled in typos: if misspelled not in corrections_dict.keys(): if select_corrections(misspelled,word,manual_corrections): corrections_dict[misspelled] = word if word in typos_dict.keys(): del typos_dict[word] pickle.dump( manual_corrections, open( manual_corrections_file, "wb" ) )
import hashlib import os from io import open import ATD from nltk import word_tokenize, sent_tokenize from nltk.draw.tree import TreeView from nltk.parse.corenlp import CoreNLPParser h = hashlib.md5() h.update('zpiao1'.encode()) key = h.hexdigest() ATD.setDefaultKey(key) parser = CoreNLPParser() dir_path = os.path.dirname(os.path.realpath(__filse__)) parent_dir = os.path.split(dir_path)[0] filename = os.path.join(parent_dir, 'stackoverflow_content') def get_valid_filename(s): s = s.lower() tokens = word_tokenize(s) tokens = [token for token in tokens if token.isalnum()] return '_'.join(tokens) def correct(line, errors): correct_line = line
import ATD import sys ATD.setDefaultKey("GEC_f5d029b602c7b98794329473432") #sentences = ["Looking too the water. Fixing your writing typoss.", # "THis sentence is corrupt. Or is it? Who knows, really? Please test it out. Bruno is an good dog. Good dogs is rare."] if len(sys.argv) < 2: print "Need input filename" sys.exit(1) filename = sys.argv[1] f = open(filename, "r") sentences = f.readlines() for s in sentences: errors = ATD.checkDocument(s) s_unicode = s.decode('utf-8') s_corr = s_unicode.encode('utf-8') s_corr = s_corr.rstrip() for error in errors: print >> sys.stderr, ("%s error for: %s **%s**. Description: %s" % (error.type, error.precontext, error.string, error.description)).encode('utf-8') if not "Diacritical" in error.description: print >> sys.stderr, ( "some suggestions: %s" % (", ".join(error.suggestions), )).encode('utf-8') if error.suggestions: s_corr = s_corr.replace(error.string.encode('utf-8'),
import ATD ATD.setDefaultKey("paraphrase#thisisit") errors = ATD.checkDocument("This is are apple") print(list(errors)) # metrics = ATD.stats("Looking too the water. Fixing your writing typoss.") # print([str(m) for m in metrics]) # # for error in errors: # print ("%s error for: %s **%s**" % (error.type, error.precontext, error.string)) # print ("some suggestions: %s" % (", ".join(error.suggestions),)) #