def sentence_check(src): # input a source sentence and correct the first error enable_rule_list1 = [ 'COMMA_COMPOUND_SENTENCE', 'EN_QUOTES', 'SENT_START_CONJUNCTIVE_LINKING_ADVERB_COMMA', 'DOUBLE_PUNCTUATION', 'COMMA_PARENTHESIS_WHITESPACE', 'DELETE_SPACE', 'SENTENCE_WHITESPACE', 'DASH_RULE' ] enable_rule_list2 = [ 'PLURAL_VERB_AFTER_THIS', 'DOES_YOU', 'FEWER_LESS', 'UPPERCASE_SENTENCE_START', 'EN_A_VS_AN', 'EVERYDAY_EVERY_DAY', 'CONFUSION_OF_THESES_THESE', 'DO_ARTS', 'WHO_WHOM', 'THIS_NNS', 'THE_SUPERLATIVE', 'MENTION_ABOUT', 'USE_TO_VERB', 'LOT_OF', 'MANY_NN', 'A_UNCOUNTABLE', 'DOWN_SIDE', 'HAVE_PART_AGREEMENT', 'NODT_DOZEN', 'PHRASE_REPETITION', 'ADVISE_VBG', 'COMPARISONS_AS_ADJECTIVE_AS' ] #res_dict = api.check(src, api_url='https://languagetool.org/api/v2/', lang='en-US') res_dict = api.check(src, api_url='http://localhost:8081/v2/', lang='en-US') res_matches = res_dict['matches'] res_matches = [m for m in res_matches if len(m['replacements']) > 0] #res_matches = [m for m in res_matches if (m['rule']['id'] in enable_rule_list1) or (m['rule']['id'] in enable_rule_list2)] res_matches = [ m for m in res_matches if m['rule']['id'] in enable_rule_list2 ] # only use list 2 for generate if len(res_matches) == 0: return None # no mistake detected match = res_matches[0] tmp_from = match['offset'] tmp_to = tmp_from + match['length'] tgt = src[:tmp_from] + match['replacements'][0]['value'] + src[tmp_to:] return tgt, match['message'], match['rule']['id']
def checker(a): s = api.check(a, api_url='http://192.168.1.146:8081/v2/', lang='en-US', pwl=['sceneries', 'KFC']) s1 = s["matches"] # print(s1) if s1 == []: a = ["未发现错误"] return a else: n = [] for i in s1: if len(i['replacements']) == 0: a2 = i['message'] b = ' ' c = i['offset'] d = i['length'] e = i['rule']['issueType'] A = a[c:c + d] A1 = i['sentence'] B = i['shortMessage'] f = { 'sentence': A1, 'questions': { 'issueType': e, 'offset': c, 'length': d, 'words': A, 'suggestion': b, 'shortMessage': B, 'Message': a2 } } f1 = [f] n += f1 else: a2 = i['message'] b = i['replacements'][0]['value'] c = i['offset'] d = i['length'] A = a[c:c + d] A1 = i['sentence'] B = i['shortMessage'] e = i['rule']['issueType'] f = { 'sentence': A1, 'questions': { 'issueType': e, 'offset': c, 'length': d, 'words': A, 'suggestion': b, 'shortMessage': B, 'Message': a2 } } f1 = [f] n += f1 return n
def api_request(text): response = api.check(text, "https://languagetool.org/api/v2/", "en-US") assert "software" in response try: match = response["matches"][0] except (IndexError): match = "This sentence does not seem to contain any grammatical errors." return (match)
def readfile(): #filename = input("Enter name of file: ") filename = "memo24.DOCX" filepath = "unprocessed/" + filename text = textract.process(filepath).decode('UTF-8') text = re.sub(regex + regex_blacklist, '', text) debug.print_text(text) grammar = api.check(text, api_url='https://languagetool.org/api/v2/', lang='en', disabled_rules="PROGRESSIVE_VERBS,CONFUSION_RULE,DASH_RULE,ENGLISH_WORD_REPEAT_BEGINNING_RULE,FROM_FORM,NO_SPACE_CLOSING_QUOTE", disabled_categories="REDUNDANCY")
def speel_checker(word): request = api.check(word, api_url='https://languagetool.org/api/v2/', lang='it') if (len(request['matches']) > 0): #è stato trovato un errore grammaticale list_of_new_values = request['matches'][0]['replacements'] return list_of_new_values[0]['value'] else: return 0
def predict_grammar_score(corpus: Corpus, name: str, grammar_penalty: float): for d in tqdm(corpus.documents): for s in d.gen_summaries: n_issues = len( api.check(s.text, api_url='https://languagetool.org/api/v2/', lang='de')["matches"]) text_len = len(s.text) s.predicted_scores[name] = max( 0, 1 - grammar_penalty * n_issues / np.log(text_len)) return corpus
def run(args): # for rules and rule ids see https://community.languagetool.org/rule/list?lang=en lines = get_lines(args.input_path) if args.strip: lines = strip_lines(lines) print(lines) results = api.check( input_text=lines, api_url='http://localhost:8081/v2/', lang='en', disabled_rules='UPPERCASE_SENTENCE_START,I_LOWERCASE,ENGLISH_WORD_REPEAT_BEGINNING_RULE,EN_COMPOUNDS,COMMA_PARENTHESIS_WHITESPACE', pwl=['UNK'] ) print('grammatical errors:', len(results['matches'])) with open(args.output_path, 'w') as f: json.dump(results, f)
def correct_text(text): result = api.check(text, lang='en-US', api_url=LANGUAGE_TOOL_URL) matches = [m for m in result['matches']] matches.sort(key=lambda x: x['offset']) chunks = [] cursor = 0 for match in matches: replacements = match['replacements'] if not replacements: continue rep_value = replacements[0]['value'] offset = match['offset'] length = match['length'] chunks.append(text[cursor:offset]) chunks.append(rep_value) cursor = offset + length if cursor < len(text): chunks.append(text[cursor:]) return ''.join(chunks)
def index(request): """ It is the main function of our project. Includes following steps: 1)First, it authenticates user, if not valid then it is redirected to login page 2)Takes user query to be processed. 3)Calls Grammar-Correction Api. and fetches api-result in Json-format 4)Uses Api result to highlight errors and make dictionary of error-data and returns to html page for rendering .. note:: | The API output is list of dictionary , where each dictionary contains error details and how to correct it. | | Attributes of Dictionary: | | -offset:Position of error in user input string. | -length:length of error(starting from offset) in user input string. | -message:What type of error is specified. | -replacement:String to replaced with, to correct error. :return: dictionary containing error-details :rtype: dict """ if request.method == 'GET': if request.user.is_authenticated: return render(request, 'index.html') else: return redirect("http://127.0.0.1:8000/accounts/login") else: if (request.POST.get('email') is not None and request.POST.get('pass') is not None): username = request.POST.get('email') password = request.POST.get('pass') print(username) print(password) user = authenticate(username=username, password=password) print(user) if (user is not None): login(request, user) return render(request, 'index.html') else: return render(request, 'registration/login.html') else: query = request.POST.get('hid', None) query = query.capitalize() hquery = query print(hquery) hquery = hquery.replace('<br>', "\n") hquery = hquery.replace(' ', '') print(hquery) fetch = api.check(query, api_url='https://languagetool.org/api/v2/', lang='en-US') hcurrentText = hquery errorlist = [] message = [] details = [] errorHtml = [] c = 0 delta = 0 for errors in fetch['matches']: internalDict = {} internalDict['offset'] = errors['offset'] internalDict['length'] = errors['length'] internalDict['text'] = errors['context']['text'] internalDict['message'] = errors['message'] internalDict['shortMessage'] = errors['shortMessage'] message.append(internalDict['shortMessage']) errorHtml.append( query[internalDict['offset']:internalDict['offset'] + internalDict['length']]) details.append(internalDict['message']) internalDict['replacement'] = [] limit = len(errors['replacements']) if limit > 7: limit = 7 for i in errors['replacements'][:limit]: internalDict['replacement'].append(i['value']) errorlist.append(internalDict) #print(internalDict["replacement"]) for errorIndex in range(len(errorlist)): addFirst = "<span style='background-color: rgb(255, 153, 171); padding:3px;' id='" + str( c) + "' name='replacePosition'>" addLast = "</span>" offset = errorlist[errorIndex]['offset'] + delta length = errorlist[errorIndex]['length'] hcurrentText = hcurrentText[:offset] + addFirst + hcurrentText[ offset:offset + length] + addLast + hcurrentText[offset + length:] delta = delta + len(addFirst + addLast) c = c + 1 res = hcurrentText replacements = [] for i in errorlist: replacements.append(i['replacement']) return render( request, 'index.html', { 'result': res, 'sug': replacements, 'details': details, 'brief': message, 'length': range(len(replacements)), 'errorHtml': errorHtml })
def count_grammatical_errors(text): grammar = api.check(text, api_url='https://languagetool.org/api/v2/', lang='en', disabled_rules="PROGRESSIVE_VERBS,CONFUSION_RULE,DASH_RULE,ENGLISH_WORD_REPEAT_BEGINNING_RULE,FROM_FORM,NO_SPACE_CLOSING_QUOTE", disabled_categories="REDUNDANCY")
def correct_content(self, content, language): # TODO to be moved to LT processes class # Segments and sends the content to LT according to the # public api rate limits # http://wiki.languagetool.org/public-http-api if os.path.isfile(self.outpath): msg = 'title exists in cache: %s'%self.title print(self.outpath) print(msg) logging.info(msg) with open(self.outpath) as f: responses = json.load(f) return responses else: responses = {'title': self.title, 'results': []} if self.online: per_req_size_limit = 6e3 # KB sentences = content.split('. ') requests = [] test_chunks = [] chunk = [] for sentence in sentences: chunk.append(sentence) total_chunk = '. '.join(chunk) if sys.getsizeof(total_chunk) > per_req_size_limit: requests.append(total_chunk) test_chunks.append((chunk[0], chunk[-1])) chunk = [] if chunk: # add last chunk requests.append('. '.join(chunk)) test_chunks.append((chunk[0], chunk[-1])) # send requests to api # TODO smarter rate limit control needed total_requests = len(requests) for i, request in enumerate(requests): try: response = api.check(request, api_url=self.languagetool, lang=language) # TODO check language, if confidence lower than 0.90 resend except Exception as e: msg = "%s language error. Trying to detect the language."\ ""%language logging.warning(msg) response = api.check(test_chunks[i][1], api_url=self.languagetool, lang=language) language_bottom = response['language']['detectedLanguage']['code'] response = api.check(test_chunks[i][0], api_url=self.languagetool, lang=language_bottom) language_top = response['language']['detectedLanguage']['code'] if language != language_top: language = language_top else: language = language_bottom msg = "%s detected as new language"%language logging.info(msg) response = api.check(request, api_url=self.languagetool, lang=language) message = '%i/%i response sent'%(i+1, total_requests) print(message) logging.info(message) if i+1 != total_requests: # wait at all except the last LT api call time.sleep(4) responses['results'].append({'content': request, 'response': response}) else: chunks = corrector.get_chunks(content) corrector.correct(chunks, responses) with open(self.outpath, 'w') as out: json.dump(responses, out, indent = 2) return responses
def test_request(): response = api.check("This is an test", API_BASE_URL, "auto") assert "software" in response match = response["matches"][0] assert isinstance(match, dict)
def textCheck(): check = api.check('helo world', 'https://languagetool.org/api/v2/', 'en-US') #document = get_model().read(id) return render_template("text-check.html", responseText=check)
def index(): global login if not login: return redirect(url_for('login')) cur = mysql.connection.cursor() # cur.execute("""DROP TABLE IF EXISTS Feedback_Doc;""") # cur.execute("""DROP TABLE IF EXISTS Feedback_Sentence;""") # cur.execute("""DROP TABLE IF EXISTS Input;""") cur.execute("""CREATE TABLE IF NOT EXISTS Input ( input_id INTEGER PRIMARY KEY AUTO_INCREMENT, user_id TEXT, message TEXT, time_stamp TEXT )""") cur.execute("""CREATE TABLE IF NOT EXISTS Feedback_Doc ( input_id INTEGER PRIMARY KEY, word_count INTEGER, label TEXT, impoliteness_score REAL, politeness_score REAL, FOREIGN KEY (input_id) REFERENCES Input(input_id) )""") cur.execute("""CREATE TABLE IF NOT EXISTS Feedback_Sentence ( id INTEGER PRIMARY KEY AUTO_INCREMENT, input_id INTEGER, sentence_content TEXT, label TEXT, impoliteness_score REAL, politeness_score REAL, strategy_count INTEGER, strategies VARCHAR(255), indices VARCHAR(255), FOREIGN KEY (input_id) REFERENCES Input(input_id) )""") label_string = "" input_text = "" title = "" strategies_set = set() highlight_index_set = set() strategies = [] strategies_all = [] if request.method == 'POST': title = request.form['theme'] input_text = request.form['sentence'] # check for grammatical mistakes grammar_check = api.check(input_text, api_url='https://languagetool.org/api/v2/', lang='en-US') grammar_messages = grammar_check['matches'] grammar_corrections, split_input, wrong_words, impolite_words, replacements = [], [], [], [], {} if len(grammar_messages) != 0: for i in range(len(grammar_messages)): # og_msg = grammar_messages[i]['context']['text'] og_msg = input_text offset = grammar_messages[i]['offset'] grammar_corrections.append(grammar_messages[i]['message']) wrong_words.append(og_msg[offset:offset+grammar_messages[i]['length']]) for repl in grammar_messages[i]['replacements']: if i not in replacements: replacements[i] = [repl['value']] else: replacements[i].append(repl['value']) split_input = input_text.split() ### NEEDS TO BE CHANGED LATER... num_corrections = str(len(replacements)) print(wrong_words) print(replacements) # Get politeness score for overall document doc_res = score_text(input_text) print("DOCUMENT POLITENESS:\n", doc_res) label_string = doc_res[0] # Get politeness score for each sentence in document sentence_list = nltk.sent_tokenize(input_text) sent_politeness_res = list() impolite_sentence_indices = dict() for i, sentence in enumerate(sentence_list): ## politeness score res = score_text(sentence) label, impolite_score, polite_score = res[0], res[1], res[2] ## strategies feedback doc = PolitenessFeatureVectorizer.preprocess([sentence])[0] strategies = get_feedback(doc) for strat in strategies: strategies_set.add(strat[0]) highlight_index_set.add(strat[1][0]) sent_politeness_res.append( (sentence, label, impolite_score, polite_score, strategies) ) print("PER SENTENCE POLITENESS\n", sent_politeness_res) # print(sent_politeness_res[0][4]) # print(len(sent_politeness_res[0][4])) strategies_all = sent_politeness_res[0][4] now = datetime.datetime.now().strftime("%b %d %Y %H:%M:%S") cur.execute("INSERT INTO Input (user_id, message, time_stamp) VALUES (%s, %s, %s)", (g.user, input_text, now)) cur.execute("SELECT input_id FROM Input WHERE time_stamp = %s", (now,)) input_id = cur.fetchone()[0] cur.execute("INSERT INTO Feedback_Doc (input_id, word_count, label, impoliteness_score, politeness_score) VALUES (%s, %s, %s, %s, %s)", (input_id, len(input_text.split()), doc_res[0], float(doc_res[1]), float(doc_res[2]))) for m in sent_politeness_res: strategies = [i[0] for i in m[4]] strategies_idx = [i[1] for i in m[4]] cur.execute( "INSERT INTO Feedback_Sentence (input_id, sentence_content, label, impoliteness_score, politeness_score, strategy_count, strategies, indices) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)", (input_id, m[0], m[1], float(m[2]), float(m[3]), len(m[4]), str(strategies), str(strategies_idx))) mysql.connection.commit() print(strategies) # cur.execute("""SELECT * FROM Input""") # print(cur.fetchall(), '\n') # cur.execute("""SELECT * FROM Feedback_Doc""") # print(cur.fetchall(), '\n') # cur.execute("""SELECT * FROM Feedback_Sentence""") # print(cur.fetchall(), '\n') cur.close() original_text = input_text # return render_template('feedback.html', user_input=input_text, label_string=label_string, impoliteness_score=impoliteness_score, politeness_score=politeness_score, strategies=strategies, grammar_msg=grammar_corrections, repl=replacements, split_inputs=split_input, num_errors=num_corrections, mistakes=wrong_words, impolite_ind=impolite_indices, impolite_words=impolite_words) return render_template('new_feedback.html',label_string = label_string, user_input = input_text, title = title,strategies_list = strategies_set, strategies = strategies_all, highlight_index = highlight_index_set)