def print_readability_metrics(text, file_name): print(file_name, " readability metrics") print("flesch reading ease: ", textstat.flesch_reading_ease(text)) print("dale chall readability: ", textstat.dale_chall_readability_score(text)) print("smog index: ", textstat.smog_index(text)) print('------------------------------------------------')
def do_text_stats(self, text): ### Syllable Count syllable_count = textstat.syllable_count(text) ### Lexicon Count lexicon_count = textstat.lexicon_count(text, True) ### Sentence Count sentence_count = textstat.sentence_count(text) ### The Flesch Reading Ease formula try: flesch_reading_ease = textstat.flesch_reading_ease(text) except TypeError as e: flesch_reading_ease = None #* 90-100 : Very Easy #* 80-89 : Easy #* 70-79 : Fairly Easy #* 60-69 : Standard #* 50-59 : Fairly Difficult #* 30-49 : Difficult #* 0-29 : Very Confusing ### The The Flesch-Kincaid Grade Level try: flesch_kincaid_grade = textstat.flesch_kincaid_grade(text) except TypeError as e: flesch_kincaid_grade = None ## The Fog Scale (Gunning FOG Formula) gunning_fog = textstat.gunning_fog(text) ### The SMOG Index smog_index = textstat.smog_index(text) ### Automated Readability Index automated_readability_index = textstat.automated_readability_index( text) ### The Coleman-Liau Index try: coleman_liau_index = textstat.coleman_liau_index(text) except TypeError as e: coleman_liau_index = None ### Linsear Write Formula linsear_write_formula = textstat.linsear_write_formula(text) ### Dale-Chall Readability Score dale_chall_readability_score = textstat.dale_chall_readability_score( text) ### Readability Consensus based upon all the above tests try: text_standard = textstat.text_standard(text) except TypeError as e: text_standard = None return { "syllable_count": syllable_count, "lexicon_count": lexicon_count, "sentence_count": sentence_count, "flesch_reading_ease": flesch_reading_ease, "flesch_kincaid_grade": flesch_kincaid_grade, "gunning_fog": gunning_fog, "smog_index": smog_index, "automated_readability_index": automated_readability_index, "coleman_liau_index": coleman_liau_index, "linsear_write_formula": linsear_write_formula, "dale_chall_readability_score": dale_chall_readability_score, "text_standard": text_standard }
def text_analytics(text): if textstat.sentence_count(text) != 0: lexicon = textstat.lexicon_count(text) #word count sent = textstat.sentence_count(text) #sentence count syll = textstat.syllable_count(text) #syllable count flesch = textstat.flesch_reading_ease(text) #flesch score smog = textstat.smog_index(text) #SMOG index fog = textstat.gunning_fog(text) #FOG index dale = textstat.dale_chall_readability_score(text) #grade level ari = textstat.automated_readability_index(text) #grade level cl = textstat.coleman_liau_index(text) #grade level flesch1 = lexicon*flesch flesch2 = sent*flesch flesch3 = syll*flesch smog1 = lexicon*smog smog2 = sent*smog smog3 = syll*smog fog1 = lexicon*fog fog2 = sent*fog fog3 = syll*fog dale1 = lexicon*dale dale2 = sent*dale dale3=syll*dale ari1 = lexicon*ari ari2 = sent*ari ari3 = syll*ari cl1 = lexicon*cl cl2 = sent*cl cl3 = syll*cl x=[lexicon,sent,syll,flesch,smog,fog,dale,ari,cl,flesch1,flesch2,flesch3,smog1, smog2,smog3,fog1,fog2,fog3,dale1,dale2,dale3,ari1,ari2,ari3,cl1,cl2,cl3] return(x)
def get_readability(df2): df = df2.copy() text_feats = df.select_dtypes(include=['object']).columns.values for i, col in enumerate(text_feats): df['flesch_reading_ease{}'.format(i)] = df[col].apply( lambda x: textstat.flesch_reading_ease(x)) df['smog_index{}'.format(i)] = df[col].apply( lambda x: textstat.smog_index(x)) df['flesch_kincaid_grade{}'.format(i)] = df[col].apply( lambda x: textstat.flesch_kincaid_grade(x)) df['coleman_liau_index{}'.format(i)] = df[col].apply( lambda x: textstat.coleman_liau_index(x)) df['automated_readability_index{}'.format(i)] = df[col].apply( lambda x: textstat.automated_readability_index(x)) df['dale_chall_readability_score{}'.format(i)] = df[col].apply( lambda x: textstat.dale_chall_readability_score(x)) df['difficult_words{}'.format(i)] = df[col].apply( lambda x: textstat.difficult_words(x)) df['linsear_write_formula{}'.format(i)] = df[col].apply( lambda x: textstat.linsear_write_formula(x)) df['gunning_fog{}'.format(i)] = df[col].apply( lambda x: textstat.gunning_fog(x)) df['text_standard{}'.format(i)] = df[col].apply( lambda x: textstat.text_standard(x)) return df
def compareContents(): if request.method == "POST": line = request.form['poem'] poem1 = request.form['poem1'] #---------Metrics comparison logic goes here. keep them in session attributes-----------------------# session['line'] = line #print("i am in row : ",row) #print "Tagline :", line #print("no of words= ",len(line.split())) #line1 = line.lstrip('0123456789.- ,') #print "flesch_reading_ease = ",textstat.flesch_reading_ease(line) fre = textstat.flesch_reading_ease(line) session['fre'] = fre #print "smog_index = ",textstat.smog_index(line) smog = textstat.smog_index(line) session['smog'] = smog #print "flesch_kincaid_grade = ",textstat.flesch_kincaid_grade(line) fkg = textstat.flesch_kincaid_grade(line) session['fkg'] = fkg #print "dale_chall_readability_score = ", textstat.dale_chall_readability_score(line) dcr = textstat.dale_chall_readability_score(line) session['dcr'] = dcr #print "gunning_fog = ",textstat.gunning_fog(line) gf = textstat.gunning_fog(line) session['gf'] = gf metrics = True return render_template('compareContents.html',metrics=metrics, line=line, fre=fre, smog=smog, fkg=fkg, dcr=dcr,gf=gf) return render_template('compareContents.html')
def _get_reading_stats(no_code_text): """ Returns reading level information :param no_code_text: String to analyse :return: list of details """ group_by = 'Reading Level Analysis ' results = [] results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by)) # higher is better, scale 0 to 100 results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by)) try: results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by)) try: results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('The SMOG Index', "Undetermined", group_by)) results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by)) results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by)) try: results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by)) except IndexError: results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by)) try: results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by)) try: results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by)) except (TypeError, IndexError): results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by)) return results
def readability(text): print("Readability\n=================================\n\n") print("Flesch Reading Ease\n________________________\n\n") print str(textstat.flesch_reading_ease(text)) + "\n" print("Smog Index\n________________________\n\n") print str(textstat.smog_index(text)) + "\n" print("Flesch Kincaid Grade\n________________________\n\n") print str(textstat.flesch_kincaid_grade(text)) + "\n" print("Coleman Liau Index\n________________________\n\n") print str(textstat.coleman_liau_index(text)) + "\n" print("ARI\n________________________\n\n") print str(textstat.automated_readability_index(text)) + "\n" print("Dale Chall\n________________________\n\n") print str(textstat.dale_chall_readability_score(text)) + "\n" print("Difficult Words\n________________________\n\n") print str(textstat.difficult_words(text)) + "\n" print("Linsear Write Formula\n________________________\n\n") print str(textstat.linsear_write_formula(text)) + "\n" print("Gunning Fog\n________________________\n\n") print str(textstat.gunning_fog(text)) + "\n" print "Compiled Score\n_____________________________\n\n" print str(textstat.text_standard(text)) + "\n" return len(adjectives)
def calculate_statistics(lyrics): """ Calculates statistics based on the text_raw of the lyrics. :return: Annotated lyrics containing information about the songs """ logging.info("Calculating Statistics") from textstat.textstat import textstat for idx, song in tqdm(enumerate(lyrics), total=len(lyrics)): try: song["num_syllables"] = textstat.syllable_count(song["text_raw"]) song["num_words"] = textstat.lexicon_count(song["text_raw"]) song["num_sentences"] = textstat.sentence_count(song["text_raw"]) song["flesch_score"] = textstat.flesch_reading_ease( song["text_raw"]) song["flesch_kincaid_level"] = textstat.flesch_kincaid_grade( song["text_raw"]) song["fog_score"] = textstat.gunning_fog(song["text_raw"]) song[ "num_difficult_words"] = textstat.dale_chall_readability_score( song["text_raw"]) except Exception as e: logging.error( "Something bad happened in the current song ! Skipping it... \n{}" .format(song)) logging.exception(e) return lyrics
def f(): print("hello") book = xlwt.Workbook() worksheet = book.add_sheet('ReadabilityScore') worksheet.write(0, 0, "Gen_sent") worksheet.write(0, 1, "flesch_reading_ease") worksheet.write(0, 2, "flesch_kincaid_grade") worksheet.write(0, 3, "dale_chall_readability_score") worksheet.write(0, 4, "gunning_fog") f = open('abc.txt') #, encoding='utf-8') row = 1 for line in iter(f): #print("i am in row : ",row) #print "Tagline :", line worksheet.write(row, 0, line) #print("no of words= ",len(line.split())) #line1 = line.lstrip('0123456789.- ,') #print "flesch_reading_ease = ",textstat.flesch_reading_ease(line) fre = textstat.flesch_reading_ease(line) worksheet.write(row, 1, fre) #print "smog_index = ",textstat.smog_index(line) smog = textstat.smog_index(line) #print "flesch_kincaid_grade = ",textstat.flesch_kincaid_grade(line) fkg = textstat.flesch_kincaid_grade(line) worksheet.write(row, 2, fkg) #print "dale_chall_readability_score = ", textstat.dale_chall_readability_score(line) dcr = textstat.dale_chall_readability_score(line) worksheet.write(row, 3, dcr) #print "gunning_fog = ",textstat.gunning_fog(line) gf = textstat.gunning_fog(line) worksheet.write(row, 4, gf) row += 1 book.save('Readability_Scores.xls')
def _calculate_scores(self, docs): docs_scores = [] for doc in docs: scores = {} scores['chars'] = ts.char_count(doc) scores['words'] = ts.lexicon_count(doc) scores['sents'] = ts.sentence_count(doc) #scores['syllables'] = ts.syllable_count(doc) scores['avg_sent_length'] = ts.avg_sentence_length(doc) scores['avg_syllables_per_word'] = ts.avg_syllables_per_word(doc) scores['avg_letters_per_word'] = ts.avg_letter_per_word(doc) scores['flesch'] = ts.flesch_reading_ease(doc) #scores['smog'] = ts.smog_index(doc) #scores['coleman_liau'] = ts.coleman_liau_index(doc) scores['automated_readability'] = ts.automated_readability_index( doc) #scores['linsear'] = ts.linsear_write_formula(doc) #scores['difficult_words'] = ts.difficult_words(doc) scores['dale_chall'] = ts.dale_chall_readability_score(doc) #scores['gunning_fog'] = ts.gunning_fog(doc) scores['lix'] = ts.lix(doc) docs_scores.append(scores) return docs_scores
def vecify(v): return [ts.flesch_reading_ease(v), # ts.smog_index(v), ts.flesch_kincaid_grade(v), ts.coleman_liau_index(v), ts.automated_readability_index(v), ts.dale_chall_readability_score(v), ts.difficult_words(v), ts.linsear_write_formula(v), ts.gunning_fog(v)]
def all_trad_scores(text): fre = textstat.flesch_reading_ease(text) fkg = textstat.flesch_kincaid_grade(text) smog = textstat.smog_index(text) cole = textstat.coleman_liau_index(text) ari = textstat.automated_readability_index(text) dale = textstat.dale_chall_readability_score(text) linsear = textstat.linsear_write_formula(text) gunning = textstat.gunning_fog(text) return [fre, fkg, smog, cole, ari, dale, linsear, gunning]
def reading_difficulty(self): diff_words = textstat.difficult_words(self.text) / self.nword flesch_kincaid = textstat.flesch_kincaid_grade(self.text) coleman_liau = textstat.coleman_liau_index(self.text) ari = textstat.automated_readability_index(self.text) dale_chall = textstat.dale_chall_readability_score(self.text) linsear = textstat.linsear_write_formula(self.text) gunning_fog = textstat.gunning_fog(self.text) - 6 smog = textstat.smog_index(self.text) avg_grade = max( math.ceil((flesch_kincaid + coleman_liau + ari + dale_chall + linsear + gunning_fog + smog) / 7), 12) return avg_grade, diff_words
def textstat_analysis(profile_text): fre = textstat.flesch_reading_ease(profile_text) smog = textstat.smog_index(profile_text) fkg = textstat.flesch_kincaid_grade(profile_text) coleman = textstat.coleman_liau_index(profile_text) ari = textstat.automated_readability_index(profile_text) dale = textstat.dale_chall_readability_score(profile_text) dw = textstat.difficult_words(profile_text) lwf = textstat.linsear_write_formula(profile_text) gf = textstat.gunning_fog(profile_text) rc = textstat.readability_consensus(profile_text) word_count = textstat.lexicon_count(profile_text) return (fre, smog, fkg, coleman, ari, dale, dw, lwf, gf, rc, word_count)
def __readability_of_text(text, score="dale_chall"): try: if type(score) == str: if score == "dale_chall": readability = ts.dale_chall_readability_score(text) return [readability] else: print('Other scores are not supported yet. You wanted: ' + score + " we have only dale_chall") else: raise ValueError except ValueError: print("the score should be of type str. You put " + str(type(score))) raise
def get_readability(contents): readability = [] readability.append(textstat.flesch_reading_ease(contents)) readability.append(textstat.smog_index(contents)) readability.append(textstat.flesch_kincaid_grade(contents)) readability.append(textstat.automated_readability_index(contents)) readability.append(textstat.dale_chall_readability_score(contents)) readability.append(textstat.difficult_words(contents)) readability.append(textstat.linsear_write_formula(contents)) readability.append(textstat.gunning_fog(contents)) readability.append(textstat.coleman_liau_index(contents)) readability.append(textstat.text_standard(contents)) return readability
def FindDifficulty(df, TgtLang, SrcLang): if TgtLang == 'eng': col = 'TgtSent' elif SrcLang == 'eng': col = 'SrcSent' else: return ('Error Message Here') df['Difficulty'] = '' df['Difficulty'] = [ float(textstat.dale_chall_readability_score(x)) for x in df[col] ] return (df)
def analyze_one(self, email): """ Analyzes a single email and stores results. """ sents = tstat.sentence_count(email) self.sent_count.append(sents if sents > 0 else 1) if email and len(email) > 0: self.flesch_kincaid_grade.append(tstat.flesch_kincaid_grade(email)) self.automated_readability_index.append( tstat.automated_readability_index(email)) self.coleman_liau_index.append(tstat.coleman_liau_index(email)) self.linsear_write_formula.append( tstat.linsear_write_formula(email)) self.dale_chall_readability_score.append( tstat.dale_chall_readability_score(email))
def readability_of_text(self, score="dale_chall"): try: if type(score) == str: if score == "dale_chall": self.readability = ts.dale_chall_readability_score( self.string) print(self.readability) else: print('Other scores are not supported yet. You wanted: ' + score + " we have only dale_chall") else: raise ValueError except ValueError: print("the score shuld be of type str. You put " + str(type(score))) raise
def main() : for arg in sys.argv[1:]: with open(arg) as f: text = f.read() with open(arg + '.readability.snip','w') as f: f.write ("syllable_count : %s\n" % textstat.syllable_count(text)) f.write ("lexicon_count : %s\n" % textstat.lexicon_count(text)) f.write ("sentence_count : %s\n" % textstat.sentence_count(text)) f.write ("difficult_words : %s\n" % textstat.difficult_words(text)) f.write ("flesch_reading_ease : %s\n" % textstat.flesch_reading_ease(text)) f.write ("flesch_kincaid_grade : %s\n" % textstat.flesch_kincaid_grade(text)) f.write ("smog_index : %s\n" % textstat.smog_index(text)) f.write ("automated_readability_index : %s\n" % textstat.automated_readability_index(text)) f.write ("coleman_liau_index : %s\n" % textstat.coleman_liau_index(text)) f.write ("linsear_write_formula : %s\n" % textstat.linsear_write_formula(text)) f.write ("dale_chall_readability_score : %s\n" % textstat.dale_chall_readability_score(text))
def analyse_readbility(self, issue): """TODO: Docstring for analyse_readbility. :issue: TODO :returns: TODO """ # Não realiza análise para uma issue sem 'body' # if not issue.body: # message = ' - [ ] To improve the readability of the text.\n' # return (None, message) gfm = GithubMarkdown(issue.body) str_markdown = gfm.parse(issue.body) str_text = self.markdown_to_text(str_markdown) dic_test_readbility = dict() if not issue.body: message = (" - [ ] To improve the text in issue body.\n") dic_test_readbility['flesch'] = -1 dic_test_readbility['ari'] = 100 dic_test_readbility['dale-chall'] = 100 return (dic_test_readbility, message) # Analisando a métrica Flesch Reading Ease Score score_flesch = textstat.flesch_reading_ease(str_text) dic_test_readbility['flesch'] = score_flesch # Analisando com o teste Automated Readability Index (ARI) score_ari = textstat.automated_readability_index(str_text) dic_test_readbility['ari'] = score_ari # Analisando com o teste Dale-Chall Readbility Score score_dale_chal = textstat.dale_chall_readability_score(str_text) dic_test_readbility['dale-chall'] = score_dale_chal if not issue.body: message = (" - [ ] To improve the text in issue body.\n") return (dic_test_readbility, message) if self._has_low_readbility(dic_test_readbility): message = ' - [ ] To improve the readability of the text.\n' else: message = None return (dic_test_readbility, message)
def scores_cal_ori(text): char_count_value=textstat.char_count(text,ignore_spaces=True) lexicon_count_value=textstat.lexicon_count(text,removepunct=True) syllable_count_value=textstat.syllable_count(text) sentence_count_value=textstat.sentence_count(text) avg_sentence_length_value=textstat.avg_sentence_length(text) avg_syllables_per_word_value=textstat.avg_syllables_per_word(text) avg_letter_per_word_value=textstat.avg_letter_per_word(text) avg_sentence_per_word_value=textstat.avg_sentence_per_word(text) flesch_kincaid_grade_value=textstat.flesch_kincaid_grade(text) smog_index_value=textstat.smog_index(text) gunning_fog_value=textstat.gunning_fog(text) difficult_words_value=textstat.difficult_words(text) dale_chall_value=textstat.dale_chall_readability_score(text) polysyllab_value=textstat.polysyllabcount(text) return char_count_value,lexicon_count_value,syllable_count_value,sentence_count_value,avg_sentence_length_value,avg_syllables_per_word_value,avg_letter_per_word_value,avg_sentence_per_word_value,flesch_kincaid_grade_value,smog_index_value,gunning_fog_value,difficult_words_value,dale_chall_value,polysyllab_value return smog_index_value
def calculate2FormulaFromFile(inputFile, isTEI=1): inputData = extractText.extractTextTEI(inputFile, isTEI) inputData = re.sub('_', ' ', inputData) # r1 = textstat.flesch_kincaid_grade(inputData) # r2 = textstat.dale_chall_readability_score(inputData) # import pdb; pdb.set_trace() try: r1 = textstat.flesch_kincaid_grade(inputData) except: print('ERROR: cannot calculate flesch_kincaid_grade for ', inputFile) r1 = -1 try: r2 = textstat.dale_chall_readability_score(inputData) except: print('ERROR: cannot calculate dale_chall_readability_score for ', inputFile) r2 = -1 print('processing file', inputFile, 'complete') return (inputFile, r1, r2)
def run_textstat(text): #text = """Playing games has always been thought to be important to the development of well-balanced and creative children; however, what part, if any, they should play in the lives of adults has never been researched that deeply. I believe that playing games is every bit as important for adults as for children. Not only is taking time out to play games with our children and other adults valuable to building interpersonal relationships but is also a wonderful way to release built up tension.""" ts_flesch_reading_ease = textstat.flesch_reading_ease(text) ts_smog_index = textstat.smog_index(text) ts_flesch_kincaid_grade = textstat.flesch_kincaid_grade(text) ts_coleman_liau_index = textstat.coleman_liau_index(text) ts_automated_readability_index = textstat.automated_readability_index(text) ts_dale_chall_readability_score = textstat.dale_chall_readability_score( text) ts_difficult_words = textstat.difficult_words(text) ts_linsear_write_formula = textstat.linsear_write_formula(text) ts_gunning_fog = textstat.gunning_fog(text) ts_text_standard = textstat.text_standard(text) return (ts_flesch_reading_ease, ts_smog_index, ts_flesch_kincaid_grade, ts_coleman_liau_index, ts_automated_readability_index, ts_dale_chall_readability_score, ts_difficult_words, ts_linsear_write_formula, ts_gunning_fog, ts_text_standard)
def calculate2FormulaFromFile(inputFile, isTEI=1): inputData = extractText.extractTextTEI(inputFile, isTEI) inputData = re.sub('_', ' ', inputData) # r1 = textstat.flesch_kincaid_grade(inputData) # r2 = textstat.dale_chall_readability_score(inputData) # import pdb; pdb.set_trace() try: r1 = textstat.flesch_kincaid_grade(inputData) except: print('ERROR: cannot calculate flesch_kincaid_grade for ', inputFile) r1 = -1 try: r2 = textstat.dale_chall_readability_score(inputData) except: print('ERROR: cannot calculate dale_chall_readability_score for ', inputFile) r2 = -1 print('processing file', inputFile, 'complete') return (inputFile, r1, r2)
def dale_chall_readability_score(text): score = textstat.dale_chall_readability_score(text) level = 0 if 0 < score < 4.9: level = 1 elif 5 <= score < 5.9: level = 2 elif 6 <= score < 6.9: level = 3 elif 7 <= score < 7.9: level = 4 elif 8 <= score < 8.9: level = 5 elif 9 <= score < 9.9: level = 6 elif 10 <= score: level = 7 return level
def score_statements(filename=DEFAULT_FILENAME, loglevel=logging.INFO, database=DB_PATH): sia = SentimentIntensityAnalyzer() for i, statement in enumerate(Statement.objects.iterator()): s = sia.polarity_scores(statement.text) score = Score(positive=s['pos'], negative=s['neg'], neutral=s['neu'], compound=s['compound'], intensity=abs(s['compound'])) words = statement.text.split() if len(words) and any(words): superficial_measures = getmeasures(words) score.flesch = superficial_measures['readability grades']['FleschReadingEase'] score.kincaid = superficial_measures['readability grades']['Kincaid'] score.dale_chall = textstat.dale_chall_readability_score(statement.text) else: score.flesch = 0 score.kincaid = 0 score.dale_chall = 0 score.save() statement.score = score statement.save() print(statement.score) return i
def lambda_handler(event, context): text = event['text'] response = {} response['flesch_reading_ease'] = textstat.flesch_reading_ease(text) response['smog_index'] = textstat.smog_index(text) response['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text) response['coleman_liau_index'] = textstat.coleman_liau_index(text) response[ 'automated_readability_index'] = textstat.automated_readability_index( text) response[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( text) response['difficult_words'] = textstat.difficult_words(text) response['linsear_write_formula'] = textstat.linsear_write_formula(text) response['gunning_fog'] = textstat.gunning_fog(text) response['text_standard'] = textstat.text_standard(text) return respond(None, response)
def get_feat_readability_metrics(self): # https://github.com/shivam5992/textstat try: test_data = self.webscrap.get_body() out = [] out.append(textstat.flesch_reading_ease(test_data)) out.append(textstat.smog_index(test_data)) out.append(textstat.flesch_kincaid_grade(test_data)) out.append(textstat.coleman_liau_index(test_data)) out.append(textstat.automated_readability_index(test_data)) out.append(textstat.dale_chall_readability_score(test_data)) out.append(textstat.difficult_words(test_data)) out.append(textstat.linsear_write_formula(test_data)) out.append(textstat.gunning_fog(test_data)) #out.append(textstat.text_standard(test_data)) return out, False except Exception as e: config.logger.error(repr(e)) return MISSING_FEATURE * 9, True
def feature_readability(essay): syllable_count = textstat.syllable_count(essay) #音节数统计 flesch_reading_ease = textstat.flesch_reading_ease(essay) #文档的易读性0-100之间的分数 smog_index = textstat.smog_index(essay) #烟雾指数,反映文档的易读程度,更精确,更容易计算 flesch_kincaid_index = textstat.flesch_kincaid_grade(essay) #等级分数,年级等级 coleman_liau_index = textstat.coleman_liau_index(essay) #返回文本的年级级别 automated_readability_index = textstat.automated_readability_index(essay) #自动可读性指数,接近理解文本需要的年级 dale_chall_readability_score = textstat.dale_chall_readability_score(essay) #返回年级级别,使用最常见的英文单词 difficult_words = textstat.difficult_words(essay) linsear_write_formula = textstat.linsear_write_formula(essay) #返回文本的年级级别 gunning_fog = textstat.gunning_fog(essay) #迷雾指数, 反映文本的阅读难度 return syllable_count, flesch_reading_ease, smog_index, flesch_kincaid_index, coleman_liau_index, automated_readability_index, dale_chall_readability_score, difficult_words, linsear_write_formula, gunning_fog
def analyseText(): values = request.get_json() required = [ 'inputText' ] if not all(k in values for k in required): return 'Missing values', 400 text = values['inputText'] result = { 'syllable_count': textstat.syllable_count(text), 'lexicon_count': textstat.lexicon_count(text), 'sentence_count': textstat.sentence_count(text), 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'gunning_fog': textstat.gunning_fog(text), 'smog_index': textstat.smog_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text) }; return jsonify(result), 200
def calculate_readability_measures(id): """ Count the words in doc and update the document. """ es = elasticsearch.Elasticsearch() source = es.get_source(index='beek', doc_type='page', id=id) # count = len(source['content'].split()) try: measures = { 'flesch': textstat.flesch_reading_ease(source['content']), 'smog': textstat.smog_index(source['content']), 'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']), 'coleman_liau': textstat.coleman_liau_index(source['content']), 'readability': textstat.automated_readability_index(source['content']), 'dale_chall': textstat.dale_chall_readability_score(source['content']), 'difficult_words': textstat.difficult_words(source['content']), 'linsear_write_formula': textstat.linsear_write_formula(source['content']), 'gunning_fog': textstat.gunning_fog(source['content']), 'consensus': textstat.readability_consensus(source['content']), } es.update(index='beek', doc_type='page', id=id, body={'doc': { 'measures': measures }}, refresh=True) except Exception as err: pass
def calculate_readability_measures(id): """ Count the words in doc and update the document. """ es = elasticsearch.Elasticsearch() source = es.get_source(index='beek', doc_type='page', id=id) # count = len(source['content'].split()) try: measures = { 'flesch': textstat.flesch_reading_ease(source['content']), 'smog': textstat.smog_index(source['content']), 'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']), 'coleman_liau': textstat.coleman_liau_index(source['content']), 'readability': textstat.automated_readability_index(source['content']), 'dale_chall': textstat.dale_chall_readability_score(source['content']), 'difficult_words': textstat.difficult_words(source['content']), 'linsear_write_formula': textstat.linsear_write_formula(source['content']), 'gunning_fog': textstat.gunning_fog(source['content']), 'consensus': textstat.readability_consensus(source['content']), } es.update(index='beek', doc_type='page', id=id, body={'doc': {'measures': measures}}, refresh=True) except Exception as err: pass
def process(data): res = np.array([]) cleaned = data.lower().strip() original = data.strip() fea1 = numOfWords(cleaned) # fea1 = fea1 / 10 fea2 = numOfChar(cleaned) # fea2 = fea2 / 100 fea3 = count(cleaned, string.punctuation) fea5 = numOfContUpperCase(original) fea4 = textstat.gunning_fog(data) fea6 = textstat.automated_readability_index(data) fea7 = textstat.linsear_write_formula(data) fea8 = textstat.difficult_words(data) fea9 = textstat.dale_chall_readability_score(data) fea10 = data.count("\'") + data.count(".") + data.count("\"") + data.count(",") + data.count( "’") + data.count("‘") + data.count("”") + data.count("“") fea10 = (fea10 / len(data)) * 1000 fea11 = data.count("1") + data.count("2") + data.count("3") + data.count("4") + data.count( "5") + data.count("6") + data.count("7") + data.count("8") + data.count("9") + data.count("0") fea12 = data.count("?") + data.count("!") + data.count("@") + data.count("#") + data.count( "$") + data.count("%") + data.count("&") fea13 = data.count(":") + data.count(";") fea14 = data.count("—") + data.count("-") + data.count("_") fea15 = (fea10 / len(data)) * 100 fea16 = data.count("(") + data.count(")") + data.count("[") + data.count("]") + data.count( "{") + data.count("}") fea17 = data.count("*") + data.count("/") fea18 = data.count("?") fea19 = fea10 + fea11 + fea12 + fea13 + fea14 + fea15 + fea16 + fea17 + fea18 res = np.array([[fea1, fea2, fea3, fea5, fea4, fea6, fea7, fea8, fea9, fea10, fea11, fea12, fea13, fea14, fea15, fea16, fea17, fea18, fea19]]) return res
def get_readability(self, corpus, type='ari'): readability = None if type == 'ari': readability = textstat.automated_readability_index(corpus) elif type == 'flesch': readability = textstat.flesch_reading_ease(corpus) elif type == 'smog': readability = textstat.smog_index(corpus) elif type == 'flesch_kinciad': readability = textstat.flesch_kincaid_grade(corpus) elif type == 'coleman': readability = textstat.coleman_liau_index(corpus) elif type == 'dale_chall': readability = textstat.dale_chall_readability_score(corpus) elif type == 'difficult_words': readability = textstat.difficult_words(corpus) elif type == 'linsear': readability = textstat.linsear_write_formula(corpus) elif type == 'gunning_fog': readability = textstat.gunning_fog(corpus) elif type == 'readability_conensus': readability = textstat.readability_consensus(corpus) return readability
def stats(self, text): test_data = text stats = {} stats['flesch_reading_ease'] = textstat.flesch_reading_ease(test_data) stats['smog'] = textstat.smog_index(test_data) stats['flesch kincaid'] = textstat.flesch_kincaid_grade(test_data) stats['coleman Liau'] = textstat.coleman_liau_index(test_data) stats['automated'] = textstat.automated_readability_index(test_data) stats['dale chall'] = textstat.dale_chall_readability_score(test_data) stats['difficult'] = textstat.difficult_words(test_data) stats['linsear'] = textstat.linsear_write_formula(test_data) stats['gunning_fog'] = textstat.gunning_fog(test_data) stats['standard'] = textstat.text_standard(test_data) stats['charcount'] = textstat.char_count(test_data) stats['lexicon count'] = textstat.lexicon_count(test_data) stats['syllable count'] = textstat.syllable_count(test_data) stats['sentence count'] = textstat.sentence_count(test_data) stats['avg sentence length'] = textstat.avg_sentence_length(test_data) stats['avg_syllables_per_word'] = textstat.avg_syllables_per_word( test_data) stats['avg_letter_per_word'] = textstat.avg_letter_per_word(test_data) stats['avg_sentence_per_word'] = textstat.avg_sentence_per_word( test_data) return stats
def updateData(self): # Full list of polarity scores self.polscore = self.sid.polarity_scores(self.text) ##### INDEX 0 IN DATA: Text Sentiment ##### # [INDEX 0] Compounded score (0.0 - 1.0) [INDEX 1] Negative connotation rating (0.0 - 1.0), # [INDEX 2] Positive connotation rating (0.0 - 1.0) [INDEX 3] Neutral connotation rating (0.0 - 1.0) self.data.append([ self.polscore['compound'], self.polscore['neg'], self.polscore['pos'], self.polscore['neu'] ]) ##### INDEX 1 IN DATA: Sentence Info ##### # [INDEX 0] Sentence count [INDEX 1] Average sentence length # [INDEX 2] Syllable count [INDEX 3] Overall word count # [INDEX 4] Character count [INDEX 5] Character count without spaces # [INDEX 6] Avg letters per word [INDEX 7] Avg syllables per word self.data.append([ textstat.sentence_count(self.text), textstat.avg_sentence_length(self.text), textstat.syllable_count(self.text), len(self.splList), textstat.char_count(self.text, False), textstat.char_count(self.text, True), textstat.avg_letter_per_word(self.text), textstat.avg_syllables_per_word(self.text) ]) ##### INDEX 2 IN DATA: Flesch Reading Ease ##### # [INDEX 0] Pure score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 100 self.freRaw = textstat.flesch_reading_ease(self.text) self.freStat = min(max(self.freRaw, 0), 100) self.data.append([ round(self.freStat, 3), self.freGrade(self.freStat), round(abs(self.freStat - 100), 2) ]) ##### INDEX 3 IN DATA: Flesch-Kincaid Grade ##### # [INDEX 0] Pure score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 18 self.fkgRaw = textstat.flesch_kincaid_grade(self.text) self.fkgStat = self.adjustScore(self.fkgRaw) self.data.append([ round(self.fkgStat, 3), self.grade(self.fkgStat), round(self.fkgStat / 0.18, 2) ]) ##### INDEX 4 IN DATA: Gunning FOG Index ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 18 self.fogRaw = textstat.gunning_fog(self.text) self.fogStat = self.adjustScore(self.fogRaw) self.data.append([ round(self.fogStat, 3), self.grade(self.fogStat), round(self.fogStat / 0.18, 2) ]) ##### INDEX 5 IN DATA: SMOG Index ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 18 self.smogRaw = textstat.smog_index(self.text) self.smogStat = self.adjustScore(self.smogRaw) self.data.append([ round(self.smogStat, 3), self.grade(self.smogStat), round(self.smogStat / 0.18, 2) ]) ##### INDEX 6 IN DATA: Automated Readability Index ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 14 self.ariRaw = textstat.automated_readability_index(self.text) self.ariStat = min(max(self.ariRaw, 0), 14) self.data.append([ round(self.ariStat, 3), self.ariGrade(ceil(self.ariStat)), round(self.ariStat / 0.14, 2) ]) #13 ##### INDEX 7 IN DATA: Coleman-Liau Index ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 18 self.cliRaw = textstat.coleman_liau_index(self.text) self.cliStat = self.adjustScore(self.cliRaw) self.data.append([ round(self.cliStat, 3), self.grade(self.cliStat), round(self.cliStat / 0.18, 2) ]) ##### INDEX 8 IN DATA: Linsear Write Index ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 18 self.lwiRaw = textstat.linsear_write_formula(self.text) self.lwiStat = self.adjustScore(self.lwiRaw) self.data.append([ round(self.lwiStat, 3), self.grade(self.lwiStat), round(self.lwiStat / 0.18, 2) ]) ##### INDEX 9 IN DATA: Dale-Chall Readability Score ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 10 self.dcrRaw = textstat.dale_chall_readability_score(self.text) self.dcrStat = min(max(self.dcrRaw, 0), 10) self.data.append([ round(self.dcrStat, 3), self.daleChallGrade(self.dcrStat), round(self.dcrStat / 0.1, 2) ]) ##### INDEX 10 IN DATA: Overall Score ##### # [INDEX 0] Pure Score [INDEX 1] Approximate grade [INDEX 2] Normalized (ratio) score # SCORE SCALE: 0 - 20 self.txtRaw = textstat.text_standard(self.text, True) self.txtStd = min(max(self.txtRaw, 0), 20) self.txtInfo = textstat.text_standard(self.text) self.data.append([ round(self.txtStd, 3), self.txtGrade(self.txtStd, self.txtInfo), round(self.txtStd / 0.2, 2) ]) return self.data
#! /usr/bin/env python from textstat.textstat import textstat import re raw_input("Please copy the lyrics to the two text files song1 and song 2. \nWhen complete hit enter to analyze.") print "" try: f = open('song1.txt') f_read = str(f.read()) cleaned = re.sub("[\(\[].*?[\)\]]", "", f_read) if textstat.dale_chall_readability_score(cleaned) < 5: print "Song #1 | Dale Chall Score: " + str(textstat.dale_chall_readability_score(cleaned)) print "Song #1 | " + "Easily understood by 4th-grade students or lower." f.close() elif textstat.dale_chall_readability_score(cleaned) < 6: print "Song #1 | Dale-Chall Score: " + str(textstat.dale_chall_readability_score(cleaned)) print "Song #1 | # of Difficult Words: " + str(textstat.difficult_words(cleaned)) print "Song #1 | " + "Easily understood by 5th-grade and 6th-grade students." f.close() elif textstat.dale_chall_readability_score(cleaned) < 7: print "Song #1 | Dale-Chall Score: " + str(textstat.dale_chall_readability_score(cleaned)) print "Song #1 | # of Difficult Words: " + str(textstat.difficult_words(cleaned)) print "Song #1 | " + "Easily understood by 7th-grade and 8th-grade students." f.close() elif textstat.dale_chall_readability_score(cleaned) < 8: print "Song #1 | Dale-Chall Score: " + str(textstat.dale_chall_readability_score(cleaned)) print "Song #1 | # of Difficult Words: " + str(textstat.difficult_words(cleaned)) print "Song #1 | " + "Easily understood by 9th-grade and 10th-grade students." f.close()
smog_index_grades.append(smog_index_grade) smog_index_total_grade += smog_index_grade ar_index_grade = textstat.automated_readability_index(tweet) ar_index_grades.append(ar_index_grade) ar_index_total_grade += ar_index_grade cl_index_grade = textstat.coleman_liau_index(tweet) cl_index_grades.append(cl_index_grade) cl_index_total_grade += cl_index_grade lwf_grade = textstat.linsear_write_formula(tweet) lwf_grades.append(lwf_grade) lwf_total_grade += lwf_grade dcr_grade = textstat.dale_chall_readability_score(tweet) dcr_grades.append(dcr_grade) dcr_total_grade += dcr_grade num_tweets += 1 #avg grades avg_flesch_kincaid_grade = flesch_kincaid_total_grade / num_tweets avg_gunning_fog_grade = gunning_fog_total_grade / num_tweets avg_smog_index_grade = smog_index_total_grade / num_tweets avg_ar_index_grade = ar_index_total_grade / num_tweets avg_cl_index_grade = cl_index_total_grade / num_tweets avg_lwf_grade = lwf_total_grade / num_tweets avg_dcr_grade = dcr_total_grade / num_tweets
#main script if __name__ == '__main__': print "TextStat Comparison Script" print "--------------------------" #read in text from the command line #This needs to be fixed to deal/escape special characters textToCheck = raw_input("Please enter the text you would like to analyse: ") #read in text from a file- but what format? print "\n\n" print "Results" print "==============================================" print "==============================================\n" print "Syllable Count: " + str(textstat.syllable_count(textToCheck)) print "Lexicon Count: " + str(textstat.lexicon_count(textToCheck)) #TRUE is default and removes punctuation before counting print "Sentence Count: " + str(textstat.sentence_count(textToCheck)) print "Flesch Reading Ease formula: " + str(textstat.flesch_reading_ease(textToCheck)) print "Flesch-Kincaid Grade Level: " + str(textstat.flesch_kincaid_grade(textToCheck)) print "Fog Scale (Gunning FOG Formula): " + str(textstat.gunning_fog(textToCheck)) print "SMOG Index: " + str(textstat.smog_index(textToCheck)) print "Automated Readability Index: " + str(textstat.automated_readability_index(textToCheck)) print "Coleman-Liau Index: " + str(textstat.coleman_liau_index(textToCheck)) print "Linsear Write Formula: " + str(textstat.linsear_write_formula(textToCheck)) print "Dale-Chall Readability Score: " + str(textstat.dale_chall_readability_score(textToCheck)) print "--------------------------------------------------------------" print "Readability Consensus based upon all the above tests: " + str(textstat.text_standard(textToCheck)) print "\n\n"
#!/bin/python import sys, string, os from textstat.textstat import textstat inputfile = '' test_data = "" script_name = sys.argv[0] inputfile = sys.argv[1] with open(inputfile) as myfile: test_data="".join(line.rstrip() for line in myfile) var1 = str(textstat.flesch_reading_ease(test_data)) var2 = str(textstat.smog_index(test_data)) var3 = str(textstat.flesch_kincaid_grade(test_data)) var4 = str(textstat.coleman_liau_index(test_data)) var5 = str(textstat.automated_readability_index(test_data)) var6 = str(textstat.dale_chall_readability_score(test_data)) var7 = str(textstat.difficult_words(test_data)) var8 = str(textstat.linsear_write_formula(test_data)) var9 = str(textstat.gunning_fog(test_data)) var10 = str(textstat.readability_consensus(test_data)) var11 = str(textstat.syllable_count(test_data)) var12 = str(textstat.lexicon_count(test_data, 1)) var13 = str(textstat.sentence_count(test_data)) print(var1 + ',' + var2 + ',' + var3 + ',' + var4 + ',' + var5 + ',' + var6 + ',' + var7 + ',' + var8 + ',' + var9 + ',' + var10 + ',' + var11 + ',' + var12 + ',' + var13)