Exemple #1
0
def grammar_score_atd_raw(some_text):

    ATD.setDefaultKey("cfgen call " + rand_str(5))

    wait_time = 1
    have_answer = False

    while not have_answer:
        try:
            error_list = ATD.checkDocument(some_text)
            have_answer = True
        except:
            print('ATD API is having trouble!')
            print('waiting for: ' + str(wait_time) + ' sec.')
            time.sleep(wait_time)
            ATD.setDefaultKey("cfgen call " + rand_str(5))
            wait_time = wait_time + 5

    all_errors = []
    for error in error_list:
        if error.type != 'grammar':
            continue
        else:
            all_errors.append((error.string, error.precontext))
    return all_errors
	def proofread(self):
                # our API key for AfterTheDeadline
		ATD.setDefaultKey(hash("DoubleCheck")) 

		# check the document for grammar and spelling errors 	
		errors = ATD.checkDocument(self.raw)

		'''
		# print the errors
		for error in errors: 	
			print "%s error for: %s **%s**" % (error.type, error.precontext, error.string)
			print "some suggestions: %s" % (", ".join(error.suggestions),)
		'''

		# write the errors to the database
		err2db = [{"type":        error.type,
			   "precontext":  error.precontext,
			   "string":      error.string,
			   "suggestions": error.suggestions} for error in errors] 

		json_entry = json.dumps(err2db, sort_keys=True, indent=4)
		#self.database.put('/documents/' + self.author + "/" + self.filename + "/proofread/", err2db) 
		#self.database.put(err2db) 
                #print err2db
		#result = self.database.post('/proofreads/' + self.author + '/' + self.filename[:-5], err2db)
		return err2db
Exemple #3
0
def generate_article_score(content):
    ATD.setDefaultKey(settings.ATD_API_KEY)
    metrics = ATD.stats(content)
    error_types = ['grammar','spell','style']
    error_count = 0
    word_count = 0
    grammar_error_count = 0
    spell_error_count = 0
    style_error_count = 0
    for m in metrics:
        if m.type in error_types:
            error_count+=m.value
        if m.type == error_types[0]:
            grammar_error_count+=m.value
        if m.type == error_types[1]:
            spell_error_count+=m.value
        if m.type == error_types[2]:
            style_error_count+=m.value
        if m.type == 'stats' and m.key == 'words':
            word_count = m.value
    return_dict = {
        'error_count':error_count,
        'word_count':word_count,
        'grammar_error_count': grammar_error_count,
        'spell_error_count': spell_error_count,
        'style_error_count': style_error_count,
    }
    return return_dict
Exemple #4
0
def check_grammar(text):
    ATD.setDefaultKey("ENTER_YOUR_API_KEY")
    metrics = ATD.stats(text)
    spell_count = 0
    grammar_count = 0
    for i in [str(m) for m in metrics]:
        if is_subseq('spell', i):
            spell_count += int(re.search(r'[\d]+', i).group(0))
        elif is_subseq('grammar', i):
            grammar_count += int(re.search(r'[\d]+', i).group(0))

    return [grammar_count, spell_count]
Exemple #5
0
def main(args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('textfile',
                        help='filename of the text you want to check.')
    parser.add_argument('--key', help='Your unique API Key')
    args = parser.parse_args()
    fname = args.textfile
    key = args.key
    if key == None:
        ATD.setDefaultKey("your API key")
    else:
        ATD.setDefaultKey(key)
    f = open(fname, 'r')
    text = f.read()
    errors = ATD.checkDocument(text)
    for error in errors:
        print "%s error for: %s **%s**" % (error.type, error.precontext,
                                           error.string)
        print "some suggestions: %s" % (", ".join(error.suggestions), )
Exemple #6
0
 def correct_string(self, text, ensure_length=False):
     tokens = text.split()
     errors = ATD.checkDocument(text)
     subs = {}
     for error in list(errors):
         l_suggestions = list(error.suggestions)
         if error.description == "Spelling" and len(l_suggestions) > 0:
             subs[error.string] = l_suggestions[0]
     for i, t in enumerate(list(tokens)):
         if t in subs:
             if ensure_length:
                 tokens[i] = subs[t].split()[0]
             else:
                 tokens[i] = subs[t]
     return " ".join(tokens)
# solution use http://www.afterthedeadline.com/api.slp
# and https://bitbucket.org/miguelventura/after_the_deadline/

import imp
import sys
import os.path

basepath = os.path.dirname(__file__)
sys.path.append(os.path.abspath(os.path.join(basepath, '..', 'modules/ATD')))

import ATD

i150 = imp.load_source('i150', os.path.abspath(os.path.join(basepath, '..', 'intermediate/150.py')))

# random key
ATD.setDefaultKey("9WdSTQHB2fg43cHVXZIbjJja5xxHzaMAAt4YJAWRykk=")

puzzle = i150.readPuzzle()

for solution in i150.solve(puzzle, ''):
	errors = ATD.checkDocument(solution)
	if len(errors) == 0:
		print "Solution '%s' is good" % (solution)
	else:
		print "Solution '%s' is bad. Reasons:" % (solution)

		for error in errors:
			print "%s error for: %s **%s**" % (error.type, error.precontext, error.string)
			print "some suggestions: %s" % (", ".join(error.suggestions),)
Exemple #8
0
 def correct_word(self, word):
     errors = ATD.checkDocument(word)
     for error in errors:
         if error.description == "Spelling" and error.suggestions:
             return error.suggestions[0]
     return word
Exemple #9
0
 def __init__(self):
     ATD.setDefaultKey("break-it")
def filter_typos():    
    global possible_typos_list
    
    for word in possible_typos:
        possible_typos_list.append(word)
        possible_typos_list += possible_typos[word]
    
        possible_typos_list = list(set(possible_typos_list))
    
    
    for word in  possible_typos_list:
        if word in words_dict.keys():
            if words_dict[word] < 4:
                typos_prob[word] = words_dict[word]
    
    possible_typos_list = typos_prob.keys() 
    
    print "Consulting After the Deadline webservice"
    ATD.setDefaultKey("dnava2cac04bcd5e1f7b3749e4fc8107f4f72")
    
    
    for possible_typo in possible_typos_list:
        if possible_typo in consulted:
            continue
        
        if isinstance(possible_typo,unicode):
            word = possible_typo.encode('UTF-8')
        else:
            word = possible_typo
        
        append = True
        for language in ['es', 'en']:
            if language == 'en' and isinstance(word.decode('UTF-8'),unicode):
                word = unidecode(word.decode('UTF-8'))
            ATD.setLanguage(language)
            time.sleep(5)
            try:
                errors = ATD.checkDocument(word)
            except Exception as e:
                pickle.dump( consulted, open( consulted_file, "wb" ) )
                pickle.dump( typos, open( typos_file, "wb" ) )
                time.sleep(30)
                errors = ATD.checkDocument(word)
                
            if errors:
                for error in errors:
                    
                    if len(error.suggestions) > 0:
                        
                        for correction in error.suggestions:
                            if word.lower() == correction.lower():
                                append = False
                            if isinstance(correction,unicode):
                                if word.lower() == unidecode(correction.lower()):
                                    corrections_from_atd[word] = correction
            else:
                append = False
    
            if len(errors) == 0:
                append = False
                
            if not append:
                break
            
            
        if append:
            if possible_typo not in typos:
                typos.append(possible_typo)
        consulted.append(possible_typo)                    
                    
    
    pickle.dump( consulted, open( consulted_file, "wb" ) )
    pickle.dump( typos, open( typos_file, "wb" ) )
    pickle.dump( corrections_from_atd, open( corrections_from_atd_file, "wb" ) )



    #ATD webservice doesn't contains common people name or places.
    #So I downloaded a dump from geonames.org webservice to cross-validate,
    #if a word is really a word or just a typo.
    places  = get_places_and_names()
    print "Cross-validating with geonames"
    print "{0} typos before".format(len(typos))
    for word in typos:
        if word.encode("UTF-8") in places.keys():
            typos.remove(word)
    print "{0} typos after".format(len(typos))


    #If a word was deleted by the pass filters, it means that at some point
    #it was determined to be correct. And if a word is still in typos list
    #at this point, means that the previous steps failed to determine if
    #it is a valid, correctly spelled word.
    for word in possible_typos:
        if word in typos:
            typos_dict[word] = possible_typos[word]
        else:
            for possible_correction in possible_typos[word]:
                if possible_correction in typos:
                    if word not in typos_dict.keys():
                        typos_dict[word] = []
                    typos_dict[word].append(possible_correction)
                

    #Copying corrections from ATD
    for word in corrections_from_atd:
        if type(corrections_from_atd[word]) == list:
            corrections_dict[word] = corrections_from_atd[word][0].capitalize()
        else:
            corrections_dict[word] = corrections_from_atd[word].capitalize()
    
    
    #At this point I have a dict called typos_dict with holds words and their
    #possible correction, but I don't know if the correction is in the key or
    #in the value, this section of the program determines which is the correction
    #and which is in the typo and uses the function select_corrections to make
    #automatic corrections.
    for word in typos_dict.keys():
        if word in typos:
            for possible_solution in typos_dict[word]:
                if possible_solution not in typos:
                    if word not in corrections_dict.keys():
                        if select_corrections(word,possible_solution,manual_corrections):
                            corrections_dict[word] = possible_solution
                            if word in typos_dict.keys():
                                del typos_dict[word]
        else:
            for misspelled in typos_dict[word]:
                if misspelled in typos:
                    if misspelled not in corrections_dict.keys():
                        if select_corrections(misspelled,word,manual_corrections):
                            corrections_dict[misspelled] = word
                            if word in typos_dict.keys():
                                del typos_dict[word]    
    
    pickle.dump( manual_corrections, open( manual_corrections_file, "wb" ) )
Exemple #11
0
import hashlib
import os
from io import open

import ATD
from nltk import word_tokenize, sent_tokenize
from nltk.draw.tree import TreeView
from nltk.parse.corenlp import CoreNLPParser

h = hashlib.md5()
h.update('zpiao1'.encode())
key = h.hexdigest()

ATD.setDefaultKey(key)

parser = CoreNLPParser()

dir_path = os.path.dirname(os.path.realpath(__filse__))
parent_dir = os.path.split(dir_path)[0]
filename = os.path.join(parent_dir, 'stackoverflow_content')


def get_valid_filename(s):
    s = s.lower()
    tokens = word_tokenize(s)
    tokens = [token for token in tokens if token.isalnum()]
    return '_'.join(tokens)


def correct(line, errors):
    correct_line = line
Exemple #12
0
import ATD
import sys

ATD.setDefaultKey("GEC_f5d029b602c7b98794329473432")

#sentences = ["Looking too the water. Fixing your writing typoss.",
#	"THis sentence is corrupt. Or is it? Who knows, really? Please test it out. Bruno is an good dog. Good dogs is rare."]

if len(sys.argv) < 2:
    print "Need input filename"
    sys.exit(1)

filename = sys.argv[1]
f = open(filename, "r")
sentences = f.readlines()

for s in sentences:
    errors = ATD.checkDocument(s)
    s_unicode = s.decode('utf-8')
    s_corr = s_unicode.encode('utf-8')
    s_corr = s_corr.rstrip()
    for error in errors:
        print >> sys.stderr, ("%s error for: %s **%s**. Description: %s" %
                              (error.type, error.precontext, error.string,
                               error.description)).encode('utf-8')
        if not "Diacritical" in error.description:
            print >> sys.stderr, (
                "some suggestions: %s" %
                (", ".join(error.suggestions), )).encode('utf-8')
            if error.suggestions:
                s_corr = s_corr.replace(error.string.encode('utf-8'),
Exemple #13
0
import ATD

ATD.setDefaultKey("paraphrase#thisisit")
errors = ATD.checkDocument("This is are apple")
print(list(errors))
# metrics = ATD.stats("Looking too the water. Fixing your writing typoss.")
# print([str(m) for m in metrics])
#
# for error in errors:
#     print ("%s error for: %s **%s**" % (error.type, error.precontext, error.string))
#     print ("some suggestions: %s" % (", ".join(error.suggestions),))
#