예제 #1
0
def sentence_check(src):
    # input a source sentence and correct the first error

    enable_rule_list1 = [
        'COMMA_COMPOUND_SENTENCE', 'EN_QUOTES',
        'SENT_START_CONJUNCTIVE_LINKING_ADVERB_COMMA', 'DOUBLE_PUNCTUATION',
        'COMMA_PARENTHESIS_WHITESPACE', 'DELETE_SPACE', 'SENTENCE_WHITESPACE',
        'DASH_RULE'
    ]
    enable_rule_list2 = [
        'PLURAL_VERB_AFTER_THIS', 'DOES_YOU', 'FEWER_LESS',
        'UPPERCASE_SENTENCE_START', 'EN_A_VS_AN', 'EVERYDAY_EVERY_DAY',
        'CONFUSION_OF_THESES_THESE', 'DO_ARTS', 'WHO_WHOM', 'THIS_NNS',
        'THE_SUPERLATIVE', 'MENTION_ABOUT', 'USE_TO_VERB', 'LOT_OF', 'MANY_NN',
        'A_UNCOUNTABLE', 'DOWN_SIDE', 'HAVE_PART_AGREEMENT', 'NODT_DOZEN',
        'PHRASE_REPETITION', 'ADVISE_VBG', 'COMPARISONS_AS_ADJECTIVE_AS'
    ]

    #res_dict = api.check(src, api_url='https://languagetool.org/api/v2/', lang='en-US')
    res_dict = api.check(src,
                         api_url='http://localhost:8081/v2/',
                         lang='en-US')
    res_matches = res_dict['matches']
    res_matches = [m for m in res_matches if len(m['replacements']) > 0]
    #res_matches = [m for m in res_matches if (m['rule']['id'] in enable_rule_list1) or (m['rule']['id'] in enable_rule_list2)]
    res_matches = [
        m for m in res_matches if m['rule']['id'] in enable_rule_list2
    ]  # only use list 2 for generate
    if len(res_matches) == 0:
        return None  # no mistake detected
    match = res_matches[0]
    tmp_from = match['offset']
    tmp_to = tmp_from + match['length']
    tgt = src[:tmp_from] + match['replacements'][0]['value'] + src[tmp_to:]
    return tgt, match['message'], match['rule']['id']
예제 #2
0
def checker(a):
    s = api.check(a,
                  api_url='http://192.168.1.146:8081/v2/',
                  lang='en-US',
                  pwl=['sceneries', 'KFC'])
    s1 = s["matches"]
    # print(s1)
    if s1 == []:
        a = ["未发现错误"]
        return a
    else:
        n = []
        for i in s1:
            if len(i['replacements']) == 0:
                a2 = i['message']
                b = ' '
                c = i['offset']
                d = i['length']
                e = i['rule']['issueType']
                A = a[c:c + d]
                A1 = i['sentence']
                B = i['shortMessage']
                f = {
                    'sentence': A1,
                    'questions': {
                        'issueType': e,
                        'offset': c,
                        'length': d,
                        'words': A,
                        'suggestion': b,
                        'shortMessage': B,
                        'Message': a2
                    }
                }
                f1 = [f]
                n += f1
            else:
                a2 = i['message']
                b = i['replacements'][0]['value']
                c = i['offset']
                d = i['length']
                A = a[c:c + d]
                A1 = i['sentence']
                B = i['shortMessage']
                e = i['rule']['issueType']
                f = {
                    'sentence': A1,
                    'questions': {
                        'issueType': e,
                        'offset': c,
                        'length': d,
                        'words': A,
                        'suggestion': b,
                        'shortMessage': B,
                        'Message': a2
                    }
                }
                f1 = [f]
                n += f1
        return n
예제 #3
0
def api_request(text):
    response = api.check(text, "https://languagetool.org/api/v2/", "en-US")
    assert "software" in response
    try:
        match = response["matches"][0]
    except (IndexError):
        match = "This sentence does not seem to contain any grammatical errors."
    return (match)
예제 #4
0
def readfile():
    #filename = input("Enter name of file: ")
    filename = "memo24.DOCX"
    filepath = "unprocessed/" + filename
    text = textract.process(filepath).decode('UTF-8')
    text = re.sub(regex + regex_blacklist, '', text)
    debug.print_text(text)
    grammar = api.check(text, api_url='https://languagetool.org/api/v2/', lang='en',
                        disabled_rules="PROGRESSIVE_VERBS,CONFUSION_RULE,DASH_RULE,ENGLISH_WORD_REPEAT_BEGINNING_RULE,FROM_FORM,NO_SPACE_CLOSING_QUOTE", disabled_categories="REDUNDANCY")
예제 #5
0
def speel_checker(word):
    request = api.check(word,
                        api_url='https://languagetool.org/api/v2/',
                        lang='it')

    if (len(request['matches']) > 0):  #è stato trovato un errore grammaticale
        list_of_new_values = request['matches'][0]['replacements']
        return list_of_new_values[0]['value']
    else:
        return 0
def predict_grammar_score(corpus: Corpus, name: str, grammar_penalty: float):
    for d in tqdm(corpus.documents):
        for s in d.gen_summaries:
            n_issues = len(
                api.check(s.text,
                          api_url='https://languagetool.org/api/v2/',
                          lang='de')["matches"])
            text_len = len(s.text)
            s.predicted_scores[name] = max(
                0, 1 - grammar_penalty * n_issues / np.log(text_len))
    return corpus
def run(args):
    # for rules and rule ids see https://community.languagetool.org/rule/list?lang=en
    lines = get_lines(args.input_path)
    if args.strip:
        lines = strip_lines(lines)
    print(lines)
    results = api.check(
        input_text=lines,
        api_url='http://localhost:8081/v2/',
        lang='en',
        disabled_rules='UPPERCASE_SENTENCE_START,I_LOWERCASE,ENGLISH_WORD_REPEAT_BEGINNING_RULE,EN_COMPOUNDS,COMMA_PARENTHESIS_WHITESPACE',
        pwl=['UNK']
    )

    print('grammatical errors:', len(results['matches']))
    with open(args.output_path, 'w') as f:
        json.dump(results, f)
예제 #8
0
def correct_text(text):
    result = api.check(text, lang='en-US', api_url=LANGUAGE_TOOL_URL)
    matches = [m for m in result['matches']]
    matches.sort(key=lambda x: x['offset'])
    chunks = []
    cursor = 0
    for match in matches:
        replacements = match['replacements']
        if not replacements:
            continue
        rep_value = replacements[0]['value']
        offset = match['offset']
        length = match['length']
        chunks.append(text[cursor:offset])
        chunks.append(rep_value)
        cursor = offset + length
    if cursor < len(text):
        chunks.append(text[cursor:])
    return ''.join(chunks)
예제 #9
0
def index(request):
    """
    It is the main function of our project. Includes following steps:

	1)First, it authenticates user, if not valid then it is redirected to login page
	2)Takes user query to be processed.
	3)Calls Grammar-Correction Api. and fetches api-result in Json-format
	4)Uses Api result to highlight errors and make dictionary of error-data and returns to html page for rendering     

    .. note::
        | The API output is list of dictionary , where each dictionary contains error details and how to correct it.
	|        
	| Attributes of Dictionary:
	|        
	| -offset:Position of error in user input string.
        | -length:length of error(starting from offset) in user input string.
        | -message:What type of error is specified.
        | -replacement:String to replaced with, to correct error.
    

    :return: dictionary containing error-details
    :rtype: dict
    """

    if request.method == 'GET':
        if request.user.is_authenticated:
            return render(request, 'index.html')
        else:
            return redirect("http://127.0.0.1:8000/accounts/login")
    else:
        if (request.POST.get('email') is not None
                and request.POST.get('pass') is not None):
            username = request.POST.get('email')
            password = request.POST.get('pass')
            print(username)
            print(password)
            user = authenticate(username=username, password=password)
            print(user)
            if (user is not None):
                login(request, user)
                return render(request, 'index.html')
            else:
                return render(request, 'registration/login.html')
        else:
            query = request.POST.get('hid', None)
            query = query.capitalize()
            hquery = query
            print(hquery)
            hquery = hquery.replace('<br>', "\n")
            hquery = hquery.replace('&nbsp;', '')
            print(hquery)
            fetch = api.check(query,
                              api_url='https://languagetool.org/api/v2/',
                              lang='en-US')
            hcurrentText = hquery
            errorlist = []
            message = []
            details = []
            errorHtml = []
            c = 0
            delta = 0
            for errors in fetch['matches']:
                internalDict = {}
                internalDict['offset'] = errors['offset']
                internalDict['length'] = errors['length']
                internalDict['text'] = errors['context']['text']
                internalDict['message'] = errors['message']
                internalDict['shortMessage'] = errors['shortMessage']
                message.append(internalDict['shortMessage'])
                errorHtml.append(
                    query[internalDict['offset']:internalDict['offset'] +
                          internalDict['length']])
                details.append(internalDict['message'])
                internalDict['replacement'] = []
                limit = len(errors['replacements'])
                if limit > 7:
                    limit = 7
                for i in errors['replacements'][:limit]:
                    internalDict['replacement'].append(i['value'])
                errorlist.append(internalDict)
                #print(internalDict["replacement"])

            for errorIndex in range(len(errorlist)):
                addFirst = "<span style='background-color: rgb(255, 153, 171); padding:3px;' id='" + str(
                    c) + "' name='replacePosition'>"
                addLast = "</span>"
                offset = errorlist[errorIndex]['offset'] + delta
                length = errorlist[errorIndex]['length']
                hcurrentText = hcurrentText[:offset] + addFirst + hcurrentText[
                    offset:offset + length] + addLast + hcurrentText[offset +
                                                                     length:]
                delta = delta + len(addFirst + addLast)
                c = c + 1
            res = hcurrentText
            replacements = []
            for i in errorlist:
                replacements.append(i['replacement'])
            return render(
                request, 'index.html', {
                    'result': res,
                    'sug': replacements,
                    'details': details,
                    'brief': message,
                    'length': range(len(replacements)),
                    'errorHtml': errorHtml
                })
예제 #10
0
def count_grammatical_errors(text):
    grammar = api.check(text, api_url='https://languagetool.org/api/v2/', lang='en',
                        disabled_rules="PROGRESSIVE_VERBS,CONFUSION_RULE,DASH_RULE,ENGLISH_WORD_REPEAT_BEGINNING_RULE,FROM_FORM,NO_SPACE_CLOSING_QUOTE",
                        disabled_categories="REDUNDANCY")
예제 #11
0
    def correct_content(self, content, language):
        # TODO to be moved to LT processes class
        # Segments and sends the content to LT according to the
        # public api rate limits
        # http://wiki.languagetool.org/public-http-api

        if os.path.isfile(self.outpath):
            msg = 'title exists in cache: %s'%self.title
            print(self.outpath)
            print(msg)
            logging.info(msg)
            with open(self.outpath) as f:
                responses = json.load(f)
            return responses
        else:
            responses = {'title': self.title, 'results': []}
            if self.online:
                per_req_size_limit = 6e3 # KB
                sentences = content.split('. ')
                requests = []
                test_chunks = []
                chunk = []
                for sentence in sentences:
                    chunk.append(sentence)
                    total_chunk = '. '.join(chunk)
                    if sys.getsizeof(total_chunk) > per_req_size_limit:
                        requests.append(total_chunk)
                        test_chunks.append((chunk[0], chunk[-1]))
                        chunk = []
                if chunk:
                    # add last chunk
                    requests.append('. '.join(chunk))
                    test_chunks.append((chunk[0], chunk[-1]))

                # send requests to api
                # TODO smarter rate limit control needed
                total_requests = len(requests)
                for i, request in enumerate(requests):
                    try:
                        response = api.check(request,
                                         api_url=self.languagetool,
                                         lang=language)
                    # TODO check language, if confidence lower than 0.90 resend
                    except Exception as e:
                        msg = "%s language error. Trying to detect the language."\
                              ""%language
                        logging.warning(msg)
                        response = api.check(test_chunks[i][1],
                                         api_url=self.languagetool,
                                         lang=language)
                        language_bottom = response['language']['detectedLanguage']['code']
                        response = api.check(test_chunks[i][0],
                                         api_url=self.languagetool,
                                         lang=language_bottom)
                        language_top = response['language']['detectedLanguage']['code']
                        if language != language_top:
                            language = language_top
                        else:
                            language = language_bottom
                        msg = "%s detected as new language"%language
                        logging.info(msg)
                        response = api.check(request,
                                         api_url=self.languagetool,
                                         lang=language)
                    message = '%i/%i response sent'%(i+1, total_requests)
                    print(message)
                    logging.info(message)
                    if i+1 != total_requests:
                        # wait at all except the last LT api call
                        time.sleep(4)
                    responses['results'].append({'content': request,
                                                   'response': response})
            else:
                chunks = corrector.get_chunks(content)
                corrector.correct(chunks, responses)

            with open(self.outpath, 'w') as out:
                json.dump(responses, out, indent = 2)
            return responses
예제 #12
0
def test_request():
    response = api.check("This is an test", API_BASE_URL, "auto")
    assert "software" in response
    match = response["matches"][0]
    assert isinstance(match, dict)
예제 #13
0
def textCheck():
    check = api.check('helo world', 'https://languagetool.org/api/v2/',
                      'en-US')
    #document = get_model().read(id)
    return render_template("text-check.html", responseText=check)
예제 #14
0
def index():
    global login
    if not login:
        return redirect(url_for('login'))

    cur = mysql.connection.cursor()
    # cur.execute("""DROP TABLE IF EXISTS Feedback_Doc;""")
    # cur.execute("""DROP TABLE IF EXISTS Feedback_Sentence;""")
    # cur.execute("""DROP TABLE IF EXISTS Input;""")
    cur.execute("""CREATE TABLE IF NOT EXISTS Input (
                input_id INTEGER PRIMARY KEY AUTO_INCREMENT,
                user_id TEXT,
                message TEXT,
                time_stamp TEXT
                )""")
    cur.execute("""CREATE TABLE IF NOT EXISTS Feedback_Doc (
                input_id INTEGER PRIMARY KEY,
                word_count INTEGER,
                label TEXT,
                impoliteness_score REAL,
                politeness_score REAL,
                FOREIGN KEY (input_id) REFERENCES Input(input_id)
                )""")
    cur.execute("""CREATE TABLE IF NOT EXISTS Feedback_Sentence (
                id INTEGER PRIMARY KEY AUTO_INCREMENT,
                input_id INTEGER,
                sentence_content TEXT,
                label TEXT,
                impoliteness_score REAL,
                politeness_score REAL,
                strategy_count INTEGER,
                strategies VARCHAR(255),
                indices VARCHAR(255),
                FOREIGN KEY (input_id) REFERENCES Input(input_id)
                )""")
    label_string = ""
    input_text = ""
    title = ""
    strategies_set = set()
    highlight_index_set = set()
    strategies = []
    strategies_all = []
    if request.method == 'POST':
        title = request.form['theme']
        input_text = request.form['sentence']

        # check for grammatical mistakes
        grammar_check = api.check(input_text, api_url='https://languagetool.org/api/v2/', lang='en-US')
        grammar_messages = grammar_check['matches']
        grammar_corrections, split_input, wrong_words, impolite_words, replacements = [], [], [], [], {}
        if len(grammar_messages) != 0:
            for i in range(len(grammar_messages)):
                # og_msg = grammar_messages[i]['context']['text']
                og_msg = input_text
                offset = grammar_messages[i]['offset']
                grammar_corrections.append(grammar_messages[i]['message'])
                wrong_words.append(og_msg[offset:offset+grammar_messages[i]['length']])
                for repl in grammar_messages[i]['replacements']:
                    if i not in replacements:
                        replacements[i] = [repl['value']]
                    else:
                        replacements[i].append(repl['value'])
        split_input = input_text.split()
        ### NEEDS TO BE CHANGED LATER...
        num_corrections = str(len(replacements))
        print(wrong_words)
        print(replacements)


        # Get politeness score for overall document
        doc_res = score_text(input_text)
        print("DOCUMENT POLITENESS:\n", doc_res)
        label_string = doc_res[0]

        # Get politeness score for each sentence in document
        sentence_list = nltk.sent_tokenize(input_text)
        sent_politeness_res = list()
        impolite_sentence_indices = dict()
        for i, sentence in enumerate(sentence_list):
            ## politeness score
            res = score_text(sentence)
            label, impolite_score, polite_score = res[0], res[1], res[2]

            ## strategies feedback
            doc = PolitenessFeatureVectorizer.preprocess([sentence])[0]
            strategies = get_feedback(doc)
            for strat in strategies:
                strategies_set.add(strat[0])
                highlight_index_set.add(strat[1][0])
            sent_politeness_res.append( (sentence, label, impolite_score, polite_score, strategies) )
        
        print("PER SENTENCE POLITENESS\n", sent_politeness_res)
        # print(sent_politeness_res[0][4])
        # print(len(sent_politeness_res[0][4]))
        strategies_all = sent_politeness_res[0][4]

        now = datetime.datetime.now().strftime("%b %d %Y %H:%M:%S")
        cur.execute("INSERT INTO Input (user_id, message, time_stamp) VALUES (%s, %s, %s)", (g.user, input_text, now))
        cur.execute("SELECT input_id FROM Input WHERE time_stamp = %s", (now,))
        input_id = cur.fetchone()[0]

        cur.execute("INSERT INTO Feedback_Doc (input_id, word_count, label, impoliteness_score, politeness_score) VALUES (%s, %s, %s, %s, %s)",
                    (input_id, len(input_text.split()), doc_res[0], float(doc_res[1]), float(doc_res[2])))

        for m in sent_politeness_res:
            strategies = [i[0] for i in m[4]]
            strategies_idx = [i[1] for i in m[4]]
            cur.execute(
                "INSERT INTO Feedback_Sentence (input_id, sentence_content, label, impoliteness_score, politeness_score, strategy_count, strategies, indices) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
                (input_id, m[0], m[1], float(m[2]), float(m[3]), len(m[4]), str(strategies), str(strategies_idx)))

        mysql.connection.commit()
        print(strategies)
        # cur.execute("""SELECT * FROM Input""")
        # print(cur.fetchall(), '\n')
        # cur.execute("""SELECT * FROM Feedback_Doc""")
        # print(cur.fetchall(), '\n')
        # cur.execute("""SELECT * FROM Feedback_Sentence""")
        # print(cur.fetchall(), '\n')
        cur.close()
        original_text = input_text
        # return render_template('feedback.html', user_input=input_text, label_string=label_string, impoliteness_score=impoliteness_score, politeness_score=politeness_score, strategies=strategies, grammar_msg=grammar_corrections, repl=replacements, split_inputs=split_input, num_errors=num_corrections, mistakes=wrong_words, impolite_ind=impolite_indices, impolite_words=impolite_words)
    return render_template('new_feedback.html',label_string = label_string, user_input = input_text, title = title,strategies_list = strategies_set, strategies = strategies_all, highlight_index = highlight_index_set)