def output_text(request): if request.method == 'GET': name = request.GET.get('q') tool = grammar_check.LanguageTool('en-GB') texts = name matches = tool.check(texts) name = grammar_check.correct(texts, matches) return render(request, 'index2.html', {'result': name})
def Grammar_test(self): errors = 0 tool = grammar_check.LanguageTool('en-GB') totl = self.get_prone_no(self.rtokens) for ts in self.rtokens: text = ts matches = tool.check(text) errors += len(matches) return errors / totl
def compute_grammar_metric(self): """ Computes grammar metric for text set. """ tool = grammar_check.LanguageTool('en-US') matches = tool.check(self.raw_text) approximate_number_of_words = self.raw_text.count(" ") + 1 metric = ((approximate_number_of_words - len(matches)) / float(approximate_number_of_words)) return (((2 * metric) - 1) * 5)
def count_language_mistakes(self, doc): """Returnes the count of English mistakes in the text """ tool = grammar_check.LanguageTool(self._defaultLanguage) encodedText = doc.decode("utf-8", errors='replace') try: mistakes = tool.check(encodedText) except Exception as e: mistakes = [] return len(mistakes)
def __init__(self): HTMLParser.__init__(self) self.__spell_check_res = {} self.__grammar_check_res = None self.__ignore_tag = False self.__is_code_block = False self.__in_code_block = False self.__dictionary = enchant.DictWithPWL( 'en_US', 'web-data/mxnet/doc/ignored_words.txt') self.__spell_checker = SpellChecker(self.__dictionary) self.__parsed_content = "" self.__grammar_checker = grammar_check.LanguageTool('en-US')
def get_metrics (doc): global grammar_tool # initialize dict metrics = [ 'syllables', 'words', 'spelling_errors', 'grammar_errors', 'sentences' ] res = { metric: 0 for metric in metrics } # initial parse sentences = get_sentences(doc) # words = [] # get metrics num_sentences = len(sentences) res['sentences'] = num_sentences for sentence in sentences: try: try: res['grammar_errors'] += len(grammar_tool.check(sentence)) except Exception as e: print "grammar tool failed: {}".format(e) print "reinitializing grammar tool.." grammar_tool = grammar_check.LanguageTool('en-US') time.sleep(0.1) words_for_sentence = get_words(sentence) res['words'] += len(words_for_sentence) # words.append(words_for_sentence) for word in words_for_sentence: try: # handle trailing punctuation for spellchecker if word[-1] in string.punctuation: word = word[:-1] res['syllables'] += count_syllables(word) if not spelling_tool.check(word): res['spelling_errors'] += 1 except Exception as e: print "inner exception:", e continue except Exception as e: print "outer exception:", e continue if res['words'] == 0: print "discarding...", doc return res #, sentences, words
def grade_for_grammar(essay): result = { 'score': 0, 'lexically_ambiguous_sentences': [], 'gramatically_ambiguous_sentences': [], 'individual_socre': 0 } regex_sentences = essay.split('.') regex_sentences = [string for string in regex_sentences if string != ""] sentences = [] for x in regex_sentences: r = re.findall(r"[^\s]", x) if r: sentences.append( x.replace("\r", "").replace("\n", "").replace("\t", "")) print("The sentences are the following: " + str(sentences)) tool = grammar_check.LanguageTool('en-US') incorrect = 0 for sentence in sentences: matches = tool.check(sentence) words = sentence.split() lexical_ambiguous_words_count = 0 if len(matches) > 0: incorrect += 1 result["gramatically_ambiguous_sentences"].append(sentence) continue for word in words: if word != "": token = nlp(word)[0] if token._.wordnet.synsets(): lexeme = nlp.vocab[word] if lexeme.is_stop != True: # <--- Check whether it's in stopword list if len(token._.wordnet.synsets()) >= 5: print( str(word) + " has " + str(len(token._.wordnet.synsets())) + " meanings.") lexical_ambiguous_words_count += 1 if (lexical_ambiguous_words_count / len(words)) > 0.1: incorrect += 1 result["lexically_ambiguous_sentences"].append(sentence) continue result["score"] = (len(sentences) - incorrect) / len(sentences) * 100 result["individual_score"] = 1 / len(sentences) * 100 return result
def main(): plotto = Plotto() while (plotto.isPlotting()): plotto.menu() plotto.display() plotto.generate() tool = grammar_check.LanguageTool('en-US') masterplot = plotto.masterplot.getPlot() plot_checked = grammar_check.correct(masterplot, tool.check(masterplot)) print capitalize(plot_checked.lower()) + '\n' conflict = plotto.conflicts.getConflict() characters = plotto.characters.getCharacters() print conflict + '\n' print 'Characters: ' for char in characters: print char.name + ", " + char.role print '\n'
# source: http://stackoverflow.com/a/7160778 # modified so protocol is optional. url_regex = re.compile( r'(^(?:http|ftp)s?://)?' # http:// or https:// (optional) r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) space_or_num_regex = re.compile(r'(\d|\s)+') proper_noun_regex = re.compile(r'^([0-9]|[A-Z][a-z0-9]+)') punctuation_table = dict.fromkeys(map(ord, string.punctuation)) grammar_tool = grammar_check.LanguageTool('en-US') spelling_tool = enchant.Dict('en_US') tokenizer = WhitespaceTokenizer() def get_sentences (doc): return sent_tokenize(doc) def get_words (sentence): sentence = sentence.strip() words = [] for token in tokenizer.tokenize(sentence): token = token.decode("utf-8") # remove urls
def setUp(self): self.lang_check = grammar_check.LanguageTool()
import pandas as pd import nltk from sklearn import linear_model, svm, neighbors, naive_bayes from sklearn.metrics import accuracy_score, f1_score, roc_auc_score # UNCOMMENT THIS # import enchant import grammar_check from nltk.tokenize import sent_tokenize from nltk import word_tokenize, pos_tag, ne_chunk DATA_SET_PATH = "Data Sets/op_spam_v1.4/" # UNCOMMENT THIS # SPELLING_DICT = enchant.Dict("en_US") GRAMMAR_CHECK = grammar_check.LanguageTool('en-US') def main(): raw_data = load_data() bigram_set = generate_bigram_set(raw_data) unigram_set = generate_unigram_set(raw_data) # processed_data = featurize_data(raw_data, bigram_set, unigram_set, baseline_flag=False) processed_data_baseline = featurize_data(raw_data, bigram_set, unigram_set, baseline_flag=True) # processed_data.to_csv("data.csv") # training_data = processed_data.sample(frac=0.7) # validation_data = processed_data.loc[set(processed_data.index)-set(training_data.index)].sample(frac=0.5) # test_data = processed_data.loc[set(processed_data.index)-set(training_data.index)-set(validation_data.index)]
def generate_response(self, sentence, object_name, detail_name, question, p_score, f_score, u_score, s_score, detail_array): response = "" object_score = 0 detail_score = 0 statement_score = 0 question_score = 0 self.question = question pronoun_pool = [] noun_pool = [] adjectives_pool = [] connector_pool = [] verb_pool = [] past_verb_pool = [] simple_verb_pool = [] VRB_pool = [] rb_pool = [] MD_pool = [] WRB_pool = [] WP_pool = [] #multi-dimensional array word_bank = [] if self.find_object_node(object_name): print("Previous object found!") object_score += 1 for entry in self.justTAG(sentence): pos = entry[0] tag = entry[1] if tag == 'NN': #NOUNS noun_pool.append(pos) continue elif tag == 'NNS': #NOUNS PLURAL noun_pool.append(pos) continue elif tag == 'NNP': #unidentified pronoun pronoun_pool.append(pos) continue elif tag == 'JJ': #ADJECTIVES adjectives_pool.append(pos) continue elif tag == 'VBP': #VERB verb_pool.append(pos) continue elif tag == 'VRB': if pos == "do" and question: WP_pool.append(pos) else: VRB_pool.append(pos) continue elif tag == 'VB': simple_verb_pool.append(pos) continue elif tag == 'VBD': past_verb_pool.append(pos) continue elif tag == 'VBG': verb_pool.append(pos) continue elif tag == 'VBZ': verb_pool.append(pos) continue elif tag == 'RB': #Example : do you STILL like me ? rb_pool.append(pos) continue elif tag == 'MD': MD_pool.append(pos) continue elif tag == 'WRB': WRB_pool.append(pos) continue elif tag == 'WP': WP_pool.append(pos) continue else: continue print("PRONOUNS " + str(pronoun_pool)) print("NOUNS " + str(noun_pool)) print("ADJECTIVES " + str(adjectives_pool)) print("VERBS " + str(verb_pool)) print("SIMPLE VERBS" + str(simple_verb_pool)) print("PAST VERBS " + str(past_verb_pool)) print("RB " + str(rb_pool)) print("VRB " + str(VRB_pool)) print("WRB " + str(WRB_pool)) print("MD" + str(MD_pool)) print("WP " + str(WP_pool)) existing_objects = dict() detail_count = 0 line_num = 0 detail_array = [] noun_scores = dict() self.global_noun_pool = noun_pool for word in pronoun_pool: if self.find_object_node(word): total_score = 2 detail_count, line_num, detail_array = self.check_object_details( self.get_object_line(word)) existing_objects[word] = detail_count if word == object_name: total_score = total_score * 3 total_score = total_score * detail_count noun_scores[word] = total_score else: continue for word in noun_pool: print('word:' + word) if self.find_object_node(word): total_score = 2 detail_count, line_num, detail_array = self.check_object_details( self.get_object_line(word)) existing_objects[word] = detail_count if word == object_name: total_score = total_score * 3 total_score = total_score * detail_count noun_scores[word] = total_score print("Existing objects" + str(existing_objects)) local_memory = self.get_local_memory(noun_pool) server_memory = self.get_server_memory() total_memory = self.get_total_memory(local_memory, server_memory) print('LOCAL MEMORY = ' + str(local_memory)) print('SERVER MEMORY = ' + str(server_memory)) print('TOTAL MEMORY = ' + str(total_memory)) generated_word_bank = self.create_word_bank(total_memory) print('WORD BANK : ' + str(generated_word_bank)) if question: #get all segments from wrb,md, wp banana_split = sentence.split(" ") i = len(banana_split) banana_max = 0 target_question = "" for word in banana_split: if word in WRB_pool or word in MD_pool or word in WP_pool: banana_max = i target_question = word break print("TARGET QUESTION : " + target_question) local_memory = self.get_local_memory(noun_pool) print('LOCAL MEMORY = ' + str(local_memory)) return response elif not question: pronouns = [] nouns = [] verbs = [] adjectives = [] wrb = [] final_word_bank = self.apply_weights(generated_word_bank, question, sentence, object_name, detail_name) #segment everything into question words, nouns and verbs question_starters = [ 'Did', 'How', 'When', 'Where', 'How', 'Can', 'Is', 'What', 'Should', 'Could', 'Would' ] wp = ['Who', 'What'] wdt = ['Which'] wp_doll = ['Whose'] wrb = ['Where', 'When', 'How'] md = ['Can', 'Could', 'Will', 'Should', 'Would'] q_connect = ['Was', 'Did', 'Is'] q_connect_2 = ['An', 'A', 'It'] q_past = [] q_present = [wdt] q_future = [md] q_pro = [wp, wp_doll] q_where = [wrb] self_perspective = False reverse_perspective = False sentence_bank = [] if 'I' in sentence.split(" "): self_perspective = True pronouns.append('I') if len(final_word_bank[0]) != 0: pronouns = final_word_bank[0] sentence_bank.append(pronouns) if len(final_word_bank[1]) != 0: nouns = final_word_bank[1] sentence_bank.append(nouns) if len(final_word_bank[2]) != 0: verbs = final_word_bank[2] sentence_bank.append(verbs) if len(final_word_bank[3]) != 0: adjectives = final_word_bank[3] sentence_bank.append(adjectives) if len(final_word_bank[6]) != 0: for verb in final_word_bank[6]: verbs.append(WordNetLemmatizer().lemmatize(verb, 'v')) sentence_bank.append(verbs) if len(final_word_bank[7]) != 0: for verb in final_word_bank[7]: verbs.append(WordNetLemmatizer().lemmatize(verb, 'v')) sentence_bank.append(verb) if len(final_word_bank[9]) != 0: wrb = final_word_bank[9] sentence_bank.append(wrb) print(pronouns) print(verbs) print(nouns) print(adjectives) print(wrb) sentence_RAW = str(pronouns[0] + " " + verbs[0] + " " + nouns[0]) matches = self.tool.check(sentence_RAW) print(matches) print(language_check.correct(sentence_RAW, matches)) sentence_tier2 = [] for i, question_starter in enumerate(question_starters): sentence_tier2.append(question_starters[i] + " " + sentence_RAW) print(sentence_tier2) tool = grammar_check.LanguageTool('en-GB') matches = tool.check(str(sentence_tier2[0])) print(matches) response = grammar_check.correct(str(sentence_tier2[0]), matches) print(self.get_sentence_tense(self.justTAG(sentence))) #assign gravity to each set #combine set gravities into composite gravities for noun verb pairs #combine set gravities into composite gravities for question noun verb triplets #filter out final gravity sequence return response
import grammar_check import xml.etree.ElementTree as etree import csv import sys tool = grammar_check.LanguageTool('en-GB') def number_grammer_errors(text): matches = tool.check(text) return len(matches) def extract_text_only(text): ret = '' inside_paragraph = False level = 0 i = 0 culled_text = "" while i < len(text): if text[i] == '<': i = i + 1 if text[i] == 'p': inside_paragraph = True elif text[i] == '/': i = i + 1 if text[i] == 'p': inside_paragraph = False else: level = level - 1
def grammar(self, essay): with open(essay, 'r') as text: self.essay1 = text.read() tool = grammar_check.LanguageTool('en-GB') matches = tool.check(self.essay1) print(grammar_check.correct(self.essay1, matches))
def do_something(val): tool = grammar_check.LanguageTool('en-GB') texts = val matches = tool.check(texts) return grammar_check.correct(texts,matches)
def get_grammar_count(string): tool = grammar_check.LanguageTool('en-GB') return len(tool.check(string))
def webook(): # endpoint for processing incoming messaging events #return "Hello world", 200 data = request.get_json() log( data ) # you may not want to log every incoming message in production, but it's good for testing if data["object"] == "page": for entry in data["entry"]: for messaging_event in entry["messaging"]: if messaging_event.get("message"): # someone sent us a message sender_id = messaging_event["sender"][ "id"] # the facebook ID of the person sending you the message if sender_id == u'1774667882802558': log(sender_id) return "ok", 200 recipient_id = messaging_event["recipient"][ "id"] # the recipient's ID, which should be your page's facebook ID send_process(sender_id) send_settings() message_text = messaging_event["message"].get("text") option = messaging_event["message"].get("quick_reply") log(option) log(grammarUserID) if grammarUserID.get(sender_id) == 1: grammarUserID[sender_id] = 0 log(grammarUserID) tool = grammar_check.LanguageTool('en-US') tmp = tool.check(message_text) outp = '' log(tmp) #for mis in tmp : # outp += mis + '\n' outp = grammar_check.correct(message_text, tmp) send_message(sender_id, "Correct: " + outp) return "ok", 200 if option is not None: opt = messaging_event["message"]["quick_reply"][ "payload"] log(opt) option_catch(opt, sender_id) else: if option is None: option = messaging_event.get("messaging") if option is not None: opt = option[0]["postback"]["payload"] option_catch(opt, sender_id) return "ok", 200 if message_text is not None: if message_text == 'Setting': quick_replies = [["Volcabulary", "Vocab"], ["Grammar-Check", "Gramma"], ["Category", "Cate"]] send_quickReplies( sender_id, "Which option do you choose?", quick_replies) else: send_Define(sender_id, message_text) #else: #messageType = messaging_event["message"].get("text") #if messageType == 'audio': #pass #change to text if messaging_event.get("delivery"): # delivery confirmation pass if messaging_event.get("optin"): # optin confirmation pass if messaging_event.get( "postback" ): # user clicked/tapped "postback" button in earlier message pass return "ok", 200
import argparse import grammar_check parser = argparse.ArgumentParser(description="Text Scoring") parser.add_argument("--language", help="Language of words, english by default", default="en") parser.add_argument("--sentence", help="Sentence to score") args = parser.parse_args() tool = grammar_check.LanguageTool(args.language) def get_scores(text): """Checks the sentence for errors, and returns two scores: number of errors in the sentence, and the fraction of errors from the sentence based on length""" errors = tool.check(text) num_errors = len(errors) error_fraction = sum(error.errorlength for error in errors) / len(text) return num_errors, error_fraction def print_scores(text): num_errors, error_fraction = get_scores(text) print( "Number of errors: {}\nFraction of errors from the text: {}\nFinal score: {}/100" .format(num_errors, error_fraction, int((1 - error_fraction) * 100))) if args.sentence: print_scores(args.sentence)
def get_grammar_check_count(review): tool = grammar_check.LanguageTool('en-GB') matches = tool.check(review) return 100*len(matches)/len(review.split())
def __init__(self): self.grammar_tool = gc.LanguageTool('en-GB') self.spell_master = SpellChecker("en_US") self.senti_checker = gl.sentiment_analysis.create()