def score(full): st.header(textstat.flesch_reading_ease(full)) st.write('Flesch Reading Ease Score') text = """90-100 Very Easy,70-79 Fairly Easy,60-69 Standard,50-59Fairly Difficult,30-49 Difficult,0-29 Very Confusing """ st.write(text, key=1) st.header(textstat.smog_index(full)) st.write('Smog Index Score') text = "Returns the SMOG index of the given text.This is a grade formula in that a score of 9.3 means that a ninth " \ "grader would be able to read the document.Texts of fewer than 30 sentences are statistically invalid, " \ "because the SMOG formula was normed on 30-sentence samples. textstat requires at least 3 sentences for a " \ "result. " st.write(text, key=2) st.header(textstat.dale_chall_readability_score(full)) st.write('Dale Chall Readability Score') text = """Different from other tests, since it uses a lookup table of the most commonly used 3000 English words. Thus it returns the grade level using the New Dale-Chall Formula. 4.9 or lower average 4th-grade student or lower 5.0–5.9 average 5th or 6th-grade student 6.0–6.9 average 7th or 8th-grade student 7.0–7.9 average 9th or 10th-grade student 8.0–8.9 average 11th or 12th-grade student 9.0–9.9 average 13th to 15th-grade (college) student""" st.write(text, key=3)
def get_stats(text): fre = textstat.flesch_reading_ease(text) smog = textstat.smog_index(text) fkg = textstat.flesch_kincaid_grade(text) cli = textstat.coleman_liau_index(text) ari = textstat.automated_readability_index(text) dcr = textstat.dale_chall_readability_score(text) diff_words = textstat.difficult_words(text) lwf = textstat.linsear_write_formula(text) gunn_fog = textstat.gunning_fog(text) consolidated_score = textstat.text_standard(text) doc_length = len(text) # think about excluding spaces? quote_count = text.count('"') stats = { "flesch_reading_ease": fre, "smog_index": smog, "flesch_kincaid_grade": fkg, "coleman_liau_index": cli, "automated_readability_index": ari, "dale_chall_readability_score": dcr, "difficult_words": diff_words, "linsear_write_formula": lwf, "gunning_fog": gunn_fog, "consolidated_score": consolidated_score, "doc_length": doc_length, "quote_count": quote_count } return stats
def get_readability_score(text, metric="flesch"): global tknzr, DIFFICULT text = text.replace("’", "'") # https://pypi.org/project/textstat/ if metric == "flesch": return textstat.flesch_reading_ease(text) elif metric == "smog": return textstat.smog_index(text) elif metric == "coleman_liau_index": return textstat.coleman_liau_index(text) elif metric == "automated_readability_index": return textstat.automated_readability_index(text) elif metric == "dale_chall_readability_score": return textstat.dale_chall_readability_score(text) elif metric == "difficult_words": nb_difficult = 0 nb_easy = 0 for w in set(tknzr.tokenize(text.lower())): if w not in EASY_WORDS and len(w) >= 6: nb_difficult += 1 else: nb_easy += 1 return 100 * nb_difficult / (nb_difficult + nb_easy) #return textstat.difficult_words(text)#/len(text.split()) elif metric == "linsear_write_formula": return textstat.linsear_write_formula(text) elif metric == "gunning_fog": return textstat.gunning_fog(text) elif metric == "avg_word_length": words = tknzr.tokenize(text) words = [w for w in words if w not in misc_utils.PUNCT] if len(words) == 0: return 0 return np.average([len(w) for w in words])
def readability(queries): scores = pd.DataFrame(columns=[ 'Flesch', 'Smog', 'Flesch grade', 'Coleman', 'Automated', 'Dale', 'Difficult', 'Linsear', 'Gunning', 'Text Standard' ]) scores = { 'Flesch': [], 'Smog': [], 'Flesch grade': [], 'Coleman': [], 'Automated': [], 'Dale': [], 'Difficult': [], 'Linsear': [], 'Gunning': [], 'Text Standard': [] } for line in queries: # results = readability.getmeasures(line, lang='en') # frescores.append(results['readability grades']['FleschReadingEase']) # line = 'yao family wines . yao family wines is a napa valley producer founded in 2011 by yao ming , the chinese-born , five-time nba all star . now retired from the houston rockets , yao ming is the majority owner in yao family wines , which has entered the wine market with a luxury cabernet sauvignon sourced from napa valley vineyards .' scores['Flesch'].append(textstat.flesch_reading_ease(line)) scores['Smog'].append(textstat.smog_index(line)) scores['Flesch grade'].append(textstat.flesch_kincaid_grade(line)) scores['Coleman'].append(textstat.coleman_liau_index(line)) scores['Automated'].append(textstat.automated_readability_index(line)) scores['Dale'].append(textstat.dale_chall_readability_score(line)) scores['Difficult'].append(textstat.difficult_words(line)) scores['Linsear'].append(textstat.linsear_write_formula(line)) scores['Gunning'].append(textstat.gunning_fog(line)) scores['Text Standard'].append( textstat.text_standard(line, float_output=True)) return scores
def readability_measures(self, as_dict=False): """ Return the BOFIR score as well as other classic readability formulas for the paragraph. Parameters ---------- as_dict : boolean Defines if output is a dataframe or dict Returns ------- d: DataFrame DataFrame with the BOFIR score and additional readability measures """ flesch = self.flesch smog = textstat.smog_index(self.paragraph) dale_chall = textstat.dale_chall_readability_score(self.paragraph) fog = textstat.gunning_fog(self.paragraph) bofir_5cat = self.bofir(cat5=True) bofir_3cat = self.bofir(cat5=False) d = { 'bofir_5cat': bofir_5cat, 'bofir_3cat': bofir_3cat, 'fog': fog, 'dale_chall': dale_chall, 'smog': smog, 'flesch': flesch } if as_dict: return d else: return pd.DataFrame(d, index=['readability_score'])
def getReadabilityMetrics(test_data): ''' for a given article IN TEXT FORMAT, returns its readability metrics Uses textstat library, please install it ''' metric = { "flesch_reading_ease": textstat.flesch_reading_ease(test_data), "smog_index": textstat.smog_index(test_data), "flesch_kincaid_grade": textstat.flesch_kincaid_grade(test_data), "coleman_liau_index": textstat.coleman_liau_index(test_data), "automated_readability_index": textstat.automated_readability_index(test_data), "dale_chall_readability_score": textstat.dale_chall_readability_score(test_data), "difficult_words": textstat.difficult_words(test_data), "linsear_write_formula": textstat.linsear_write_formula(test_data), "gunning_fog": textstat.gunning_fog(test_data), "text_standard": textstat.text_standard(test_data) } return metric
def get_readibility(text, metric="flesch_kincaid_grade"): """ Return a score which reveals a piece of text's readability level. Reference: https://chartbeat-labs.github.io/textacy/getting_started/quickstart.html https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests """ if metric == "flesch_kincaid_grade": result = textstat.flesch_kincaid_grade(text) elif metric == "flesch_reading_ease": result = textstat.flesch_reading_ease(text) elif metric == "smog_index": result = textstat.smog_index(text) elif metric == "coleman_liau_index": result = textstat.coleman_liau_index(text) elif metric == "automated_readability_index": result = textstat.automated_readability_index(text) elif metric == "dale_chall_readability_score": result = textstat.dale_chall_readability_score(text) elif metric == "difficult_words": result = textstat.difficult_words(text) elif metric == "linsear_write_formula": result = textstat.linsear_write_formula(text) elif metric == "gunning_fog": result = textstat.gunning_fog(text) elif metric == "text_standard": result = textstat.text_standard(text) else: print("ERROR: Please select correct metric!") result = None return result
def analyze(): print(request) str_to_read = request.data.decode("utf-8").strip() report = { "flesch-reading-ease": textstat.flesch_reading_ease(str_to_read), "smog-index": textstat.smog_index(str_to_read), "flesch-kincaid-grade": textstat.flesch_kincaid_grade(str_to_read), "coleman-liau-index": textstat.coleman_liau_index(str_to_read), "automated-readability-index": textstat.automated_readability_index(str_to_read), "dale-chall-readability-score": textstat.dale_chall_readability_score(str_to_read), "difficult-words": textstat.difficult_words(str_to_read), "linsear-write-formula": textstat.linsear_write_formula(str_to_read), "gunning-fog": textstat.gunning_fog(str_to_read), "text-standard": textstat.text_standard(str_to_read) } return decorate_response(jsonify(report))
def read_metrics(text_clean): table = {} #table['flesch'] = textstat.flesch_reading_ease(text_clean) #table['flesch_kincaid'] = textstat.flesch_kincaid_grade(text_clean) table['fog'] = textstat.gunning_fog(text_clean) table['smog'] = textstat.smog_index(text_clean) table['ari'] = textstat.automated_readability_index(text_clean) table['coleman_liau'] = textstat.coleman_liau_index(text_clean) r_read_mets = quanteda.textstat_readability(text_clean, measure='all') table['ari_r'] = float(r_read_mets[1].r_repr()) table['rix_r'] = float(r_read_mets[35].r_repr()) table['Coleman_Liau_Grade_R'] = float(r_read_mets[9].r_repr()) table['Coleman_Liau_Short_R'] = float(r_read_mets[10].r_repr()) table['Danielson_Bryan_R'] = float(r_read_mets[14].r_repr()) table['Dickes_Steiwer_R'] = float(r_read_mets[16].r_repr()) table['ELF_R'] = float(r_read_mets[18].r_repr()) table['Farr_Jenkins_Paterson_R'] = float(r_read_mets[19].r_repr()) table['flesch_R'] = float(r_read_mets[20].r_repr()) table['flesh_kincaid_R'] = float(r_read_mets[22].r_repr()) table['FORCAST_R'] = float(r_read_mets[26].r_repr()) table['Fucks_R'] = float(r_read_mets[28].r_repr()) table['FOG_R'] = float(r_read_mets[23].r_repr()) table['Linsear_Write_R'] = float(r_read_mets[29].r_repr()) table['nWS_R'] = float(r_read_mets[31].r_repr()) table['SMOG_R'] = float(r_read_mets[37].r_repr()) table['Strain_R'] = float(r_read_mets[43].r_repr()) table['Wheeler_Smith_R'] = float(r_read_mets[46].r_repr()) return table
def score(text): a = textstat.flesch_reading_ease(text) b = textstat.flesch_kincaid_grade(text) c = textstat.gunning_fog(text) d = textstat.smog_index(text) e = textstat.coleman_liau_index(text) f = textstat.automated_readability_index(text) return a, b, c, d, e, f
def do_datas(): # logging.info('do_datas') ########### Save text statistics ##### 1. nw 2. nvocab 3. nsyllable 4.nsentence 5. tone 6. readability ## 1. nw nw.append(len(words)) ## 2. nvocab nvocab.append(len(vocab)) ## 3. syllable n = textstat.syllable_count(contents) nsyllable.append(n) ## 4. sentence n = textstat.sentence_count(contents) nsentence.append(n) ## 5. tone ### LM dictionary n_neg_lm.append(count_occurrence(words, lm_neg)) n_pos_lm.append(count_occurrence(words, lm_pos)) n_uctt_lm.append(count_occurrence(words, lm_uctt)) n_lit_lm.append(count_occurrence(words, lm_lit)) n_cstr_lm.append(count_occurrence(words, lm_cstr)) n_modal1_lm.append(count_occurrence(words, lm_modal1)) n_modal2_lm.append(count_occurrence(words, lm_modal2)) n_modal3_lm.append(count_occurrence(words, lm_modal3)) n_negation_lm.append(count_negation(words, lm_pos, gt_negation)) ### General Inquirer dictionary n_neg_gi.append(count_occurrence(words, gi_neg)) n_pos_gi.append(count_occurrence(words, gi_pos)) n_negation_gi.append(count_negation(words, gi_pos, gt_negation)) ### Henry dictionary n_neg_hr.append(count_occurrence(words, hr_neg)) n_pos_hr.append(count_occurrence(words, hr_pos)) n_negation_hr.append(count_negation(words, gi_pos, gt_negation)) ## 4. readability fre_i = textstat.flesch_reading_ease(contents) if fre_i > 100: fre_i = 100 if fre_i < 0: fre_i = float('NaN') fre.append(fre_i) fkg_i = textstat.flesch_kincaid_grade(contents) if fkg_i < 0: fkg_i = float('NaN') fkg.append(fkg_i) # RIX cl_i = textstat.coleman_liau_index(contents) if cl_i < 0: cl_i = float('NaN') cl.append(cl_i) f = textstat.gunning_fog(contents) fog.append(f) f = textstat.automated_readability_index(contents) ari.append(f) f = textstat.smog_index(contents) smog.append(f)
def score(self, strText): self.automated_readability_index = textstat.automated_readability_index( strText) self.str_automated_readability_index = self.grade( self.automated_readability_index) self.coleman_liau_index = textstat.coleman_liau_index(strText) self.str_coleman_liau_index = self.grade(self.coleman_liau_index) self.dale_chall_readability_score = textstat.dale_chall_readability_score( strText) if self.dale_chall_readability_score >= 9.0: self.str_dale_chall_readability_score = ' | ' + '13th to 15th grade (college)' elif self.dale_chall_readability_score >= 8.0: self.str_dale_chall_readability_score = ' | ' + '11th to 12th grade' elif self.dale_chall_readability_score >= 7.0: self.str_dale_chall_readability_score = ' | ' + '9th to 10th grade' elif self.dale_chall_readability_score >= 6.0: self.str_dale_chall_readability_score = ' | ' + '7th to 8th grade' elif self.dale_chall_readability_score >= 5.0: self.str_dale_chall_readability_score = ' | ' + '5th to 6th grade' else: self.str_dale_chall_readability_score = ' | ' + '4th grade or lower' self.difficult_words = textstat.difficult_words(strText) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(strText) self.str_flesch_kincaid_grade = self.grade(self.flesch_kincaid_grade) self.flesch_reading_ease = textstat.flesch_reading_ease(strText) if self.flesch_reading_ease >= 90: self.str_flesch_reading_ease = ' | ' + 'Very Easy' elif self.flesch_reading_ease >= 80: self.str_flesch_reading_ease = ' | ' + 'Easy' elif self.flesch_reading_ease >= 70: self.str_flesch_reading_ease = ' | ' + 'Fairly Easy' elif self.flesch_reading_ease >= 60: self.str_flesch_reading_ease = ' | ' + 'Standard' elif self.flesch_reading_ease >= 50: self.str_flesch_reading_ease = ' | ' + 'Fairly Difficult' elif self.flesch_reading_ease >= 30: self.str_flesch_reading_ease = ' | ' + 'Difficult' else: self.str_flesch_reading_ease = ' | ' + 'Very Confusing' self.gunning_fog = textstat.gunning_fog(strText) self.str_gunning_fog = self.grade(self.gunning_fog) self.linsear_write_formula = textstat.linsear_write_formula(strText) self.str_linsear_write_formula = self.grade(self.linsear_write_formula) self.smog_index = textstat.smog_index(strText) self.str_smog_index = self.grade(self.smog_index) self.text_standard = textstat.text_standard(strText)
def compute_readability_stats(text): """ Compute reading statistics of the given text Reference: https://github.com/shivam5992/textstat Parameters ========== text: str, input section or abstract text """ try: readability_dict = { 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall': textstat.dale_chall_readability_score(text), 'difficult_words': textstat.difficult_words(text), 'linsear_write': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text), 'n_syllable': textstat.syllable_count(text), 'avg_letter_per_word': textstat.avg_letter_per_word(text), 'avg_sentence_length': textstat.avg_sentence_length(text) } except: readability_dict = { 'flesch_reading_ease': None, 'smog': None, 'flesch_kincaid_grade': None, 'coleman_liau_index': None, 'automated_readability_index': None, 'dale_chall': None, 'difficult_words': None, 'linsear_write': None, 'gunning_fog': None, 'text_standard': None, 'n_syllable': None, 'avg_letter_per_word': None, 'avg_sentence_length': None } return readability_dict
def getWordComplexityScore(self,tokens, i): # A higher score means a document takes a higher education level to read if (i == 1): score = textstat.gunning_fog(tokens) elif (i == 2): # Texts of fewer than 30 sentences are statistically invalid, because the SMOG formula was normed on 30-sentence samples. # textstat requires atleast 3 sentences per article for a result. score = textstat.smog_index(tokens) else: score = textstat.flesch_kincaid_grade(tokens) return score
def generate_score(self, text): self.flesch_reading_grade = ts.flesch_reading_ease(text) self.flesch_reading_grade_consensus = readability_test_consensus(self.flesch_reading_grade, flesch_ease_grading_system) self.flesch_kincaid_grade = ts.flesch_kincaid_grade(text) self.flesch_kincaid_grade_consensus = readability_test_consensus(self.flesch_kincaid_grade, us_grade_level_system_age) self.dale_chall_grade = ts.dale_chall_readability_score(text) self.dale_chall_grade_consensus = readability_test_consensus(self.dale_chall_grade, dale_chall_system) self.smog_grade = ts.smog_index(text) self.ari_grade = ts.automated_readability_index(text) """ self.ari_grade_consensus = readability_test_consensus(self.ari_grade, us_grade_level_system_level) """ self.coleman_liau_grade = ts.coleman_liau_index(text) pass
def process(self, df): t0 = time() print("\n---Generating Readability Features:---\n") def lexical_diversity(text): words = nltk.tokenize.word_tokenize(text.lower()) word_count = len(words) vocab_size = len(set(words)) diversity_score = vocab_size / word_count return diversity_score def get_counts(text, word_list): words = nltk.tokenize.word_tokenize(text.lower()) count = 0 for word in words: if word in word_list: count += 1 return count df['flesch_reading_ease'] = df['articleBody'].map(lambda x: textstat.flesch_reading_ease(x)) df['smog_index'] = df['articleBody'].map(lambda x: textstat.smog_index(x)) df['flesch_kincaid_grade'] = df['articleBody'].map(lambda x: textstat.flesch_kincaid_grade(x)) df['coleman_liau_index'] = df['articleBody'].map(lambda x: textstat.coleman_liau_index(x)) df['automated_readability_index'] = df['articleBody'].map(lambda x: textstat.automated_readability_index(x)) df['dale_chall_readability_score'] = df['articleBody'].map(lambda x: textstat.dale_chall_readability_score(x)) df['difficult_words'] = df['articleBody'].map(lambda x: textstat.difficult_words(x)) df['linsear_write_formula'] = df['articleBody'].map(lambda x: textstat.linsear_write_formula(x)) df['gunning_fog'] = df['articleBody'].map(lambda x: textstat.gunning_fog(x)) df['i_me_myself'] = df['articleBody'].apply(get_counts,args = (['i', 'me', 'myself'],)) df['punct'] = df['articleBody'].apply(get_counts,args = ([',','.', '!', '?'],)) df['lexical_diversity'] = df['articleBody'].apply(lexical_diversity) feats = ['flesch_reading_ease', 'smog_index', 'flesch_kincaid_grade', 'coleman_liau_index', 'automated_readability_index', 'dale_chall_readability_score', 'difficult_words', 'linsear_write_formula', 'gunning_fog', 'i_me_myself', 'punct', 'lexical_diversity' ] outfilename_xReadable = df[feats].values with open('../saved_data/read.pkl', 'wb') as outfile: pickle.dump(feats, outfile, -1) pickle.dump(outfilename_xReadable, outfile, -1) print ('readable features saved in read.pkl') print('\n---Readability Features is complete---') print("Time taken {} seconds\n".format(time() - t0)) return 1
def readability_scores(self, text): self.ari = textstat.automated_readability_index(text) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(text) self.coleman_liau_index = textstat.coleman_liau_index(text) self.dale_chall_readability_score = textstat.dale_chall_readability_score( text) self.flesch_reading_ease = textstat.flesch_reading_ease(text) self.gunning_fog = textstat.gunning_fog(text) self.linsear_write_formula = textstat.linsear_write_formula(text) self.lix = textstat.lix(text) self.rix = textstat.rix(text) self.smog_index = textstat.smog_index(text) self.text_standard = textstat.text_standard(text)
def get_readability_stats(text): return { 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog_index': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text, float_output=True), }
def analyze_vocab(text): return { 'num_words': textstat.lexicon_count(text), 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog_index': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text), 'difficult_words': textstat.difficult_words(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text, float_output=True) }
def vocab_check(text): #Construct dictionary vocab_results = {'dale_chall_readability_score': dale_chall_readability_score(text), 'smog_index': smog_index(text), 'gunning_fog': gunning_fog(text), 'flesch_reading_ease': flesch_reading_ease(text), 'flesch_kincaid_grade': flesch_kincaid_grade(text), 'linsear_write_formula': linsear_write_formula(text), 'coleman_liau_index': coleman_liau_index(text), 'automated_readability_index': automated_readability_index(text), 'yule_vocab_richness': yule(text), 'total_score': text_standard(text, float_output=True)} diff_words, easy_word_dict = difficult_words(text) return(vocab_results, diff_words, easy_word_dict)
def lisibilty(text): f_lis = ([ textstat.syllable_count(str(text), lang='en_arabic'), textstat.lexicon_count(str(text), removepunct=True), textstat.sentence_count(str(text)), textstat.flesch_reading_ease(str(text)), textstat.flesch_kincaid_grade(str(text)), textstat.gunning_fog(str(text)), textstat.smog_index(str(text)), textstat.automated_readability_index(str(text)), textstat.coleman_liau_index(str(text)), textstat.linsear_write_formula(str(text)), textstat.dale_chall_readability_score(str(text)) ]) return f_lis
def getWordComplexityScore(tokens, i, article): # A higher score means a document takes a higher education level to read tokenizer = RegexpTokenizer(r'\w+') zen_no_punc = tokenizer.tokenize(tokens) sentences = sentenceDictionary[article] if (i == 1): score = textstat.gunning_fog(tokens) elif (i == 2): #Texts of fewer than 30 sentences are statistically invalid, because the SMOG formula was normed on 30-sentence samples. # textstat requires atleast 3 sentences for a result. if sentences >= 3: score = textstat.smog_index(tokens) else: score = 0.0 else: score = textstat.flesch_kincaid_grade(tokens) return score
def textstat_stats(text): difficulty = textstat.flesch_reading_ease(text) grade_difficulty = textstat.flesch_kincaid_grade(text) gfog = textstat.gunning_fog(text) smog = textstat.smog_index(text) ari = textstat.automated_readability_index(text) cli = textstat.coleman_liau_index(text) lwf = textstat.linsear_write_formula(text) dcrs = textstat.dale_chall_readability_score(text) idx = [ 'difficulty', 'grade_difficulty', 'gfog', 'smog', 'ari', 'cli', 'lwf', 'dcrs' ] return pd.Series( [difficulty, grade_difficulty, gfog, smog, ari, cli, lwf, dcrs], index=idx)
def get_readability_features(self): sent_tokens = text_tokenizer(self.raw_text, replace_url_flag=True, tokenize_sent_flag=True) sentences = [' '.join(sent) + '\n' for sent in sent_tokens] sentences = ''.join(sentences) self.syllable_count = textstat.syllable_count(sentences) self.flesch_reading_ease = textstat.flesch_reading_ease(sentences) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(sentences) self.fog_scale = textstat.gunning_fog(sentences) self.smog = textstat.smog_index(sentences) self.automated_readability = textstat.automated_readability_index( sentences) self.coleman_liau = textstat.coleman_liau_index(sentences) self.linsear_write = textstat.linsear_write_formula(sentences) self.dale_chall_readability = textstat.dale_chall_readability_score( sentences) self.text_standard = textstat.text_standard(sentences)
def score_text(self, test_data): score = {} score['flesch_reading_ease'] = textstat.flesch_reading_ease(test_data) score['smog_index'] = textstat.smog_index(test_data) score['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade( test_data) score['coleman_liau_index'] = textstat.coleman_liau_index(test_data) score[ 'automated_readability_index'] = textstat.automated_readability_index( test_data) score[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( test_data) score['difficult_words'] = textstat.difficult_words(test_data) score['linsear_write_formula'] = textstat.linsear_write_formula( test_data) score['gunning_fog'] = textstat.gunning_fog(test_data) score['text_standard'] = textstat.text_standard(test_data) return score
def generate_score(self, text): r = pyReadability(text) self.flesch_reading_grade = ts.flesch_reading_ease(text) self.flesch_reading_grade_consensus = readability_test_consensus( self.flesch_reading_grade, flesch_ease_grading_system) ## Sync with value offered by MS Word # self.flesch_kincaid_grade = ts.flesch_kincaid_grade(text) self.flesch_kincaid_grade = r.flesch_kincaid().score self.flesch_kincaid_grade_consensus = readability_test_consensus( self.flesch_kincaid_grade, us_grade_level_system_age) self.dale_chall_grade = ts.dale_chall_readability_score(text) self.dale_chall_grade_consensus = readability_test_consensus( self.dale_chall_grade, dale_chall_system) self.smog_grade = ts.smog_index(text) self.ari_grade = ts.automated_readability_index(text) """ self.ari_grade_consensus = readability_test_consensus(self.ari_grade, us_grade_level_system_level) """ self.coleman_liau_grade = ts.coleman_liau_index(text) pass
def _extract_readability_scores(self, text: Text, scores=None) -> Dict: output = {} if scores == None or 'flesch_reading_ease' in scores: output['flesch_reading_ease'] = textstat.flesch_reading_ease(text) if scores == None or 'smog_index' in scores: output['smog_index'] = textstat.smog_index(text) if scores == None or 'flesch_kincaid_grade' in scores: output['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade( text) if scores == None or 'coleman_liau_index' in scores: output['coleman_liau_index'] = textstat.coleman_liau_index(text) if scores == None or 'automated_readability_index' in scores: output[ 'automated_readability_index'] = textstat.automated_readability_index( text) if scores == None or 'dale_chall_readability_score' in scores: output[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( text) if scores == None or 'difficult_words' in scores: output['difficult_words'] = textstat.difficult_words(text) if scores == None or 'linsear_write_formula' in scores: output['linsear_write_formula'] = textstat.linsear_write_formula( text) if scores == None or 'gunning_fog' in scores: output['gunning_fog'] = textstat.gunning_fog(text) if scores == None or 'text_standard' in scores: output['text_standard'] = textstat.text_standard(text, float_output=True) return output
def text_analysis(test_data): #flesch_reading_ease: higher scores indicate material that is easier to read. aim for >60.0 print ('flesch_reading_ease: '+str(textstat.flesch_reading_ease(test_data))) #smog_index: Calculates US grade level print ('smog_index: '+str(textstat.smog_index(test_data))) #flesch_kincaid_grade: Calculates US grade level print ('flesch_kincaid_grade: '+str(textstat.flesch_kincaid_grade(test_data))) #Colman Liau: Calculates US grade level print ('coleman_liau_index: '+str(textstat.coleman_liau_index(test_data))) #automated_readability_index: Calculates US grade level print ('automated_readability_index: '+str(textstat.automated_readability_index(test_data))) #Dale Chall Readability Score: 0.1579(dificult words / words *100) + 0.0496(words/sentences) print ('dale_chall_readability_score: '+str(textstat.dale_chall_readability_score(test_data))) #number of difficult words print ('difficult_words: '+str(textstat.difficult_words(test_data))) #Linsear Write: Calculates the U.S. grade level of a text sample based on sentence length and the number of words with three or more syllables. print ('linsear_write_formula: '+str(textstat.linsear_write_formula(test_data))) #gunning_frog: The text can be understood by someone who left full-time education at a later age than the index print ('gunning_fog: '+str(textstat.gunning_fog(test_data))) #text_standard: Calculates US grade level print ('text_standard: '+str(textstat.text_standard(test_data)))
def print_readability(text_to_analyse, option='short'): if option == 'all': print( "flesch (0-29: confusing, 30-59: Difficult, 60-69: Standard, 70-100: Easy): ", textstat.flesch_reading_ease(text_to_analyse)) print("smog (years of education required): ", textstat.smog_index(text_to_analyse)) print( "flesch kinkaid (70-100: Fairly Easy; 60-70: Plain English; 30-60: Fairly Difficult; 30-0: Very Difficult): ", textstat.flesch_kincaid_grade(text_to_analyse)) print("coleman liau: ", textstat.coleman_liau_index(text_to_analyse)) print( "auto read (1-4: 5-10 years age; 5-8: 10-14 y; 9-12: 14-18 y; 13-14: 18+): ", textstat.automated_readability_index(text_to_analyse)) print("dale chall (< 5: kid; 5-8: scholar; 9-10: college): ", textstat.dale_chall_readability_score(text_to_analyse)) print("difficult words: ", textstat.difficult_words(text_to_analyse)) print("linsear write: ", textstat.linsear_write_formula(text_to_analyse)) print("gunning fog (9-12: High-school; 13-17: College): ", textstat.gunning_fog(text_to_analyse)) print("text standard (estimated school grade level): ", textstat.text_standard(text_to_analyse))
def test_smog_index(self): index = textstat.smog_index(self.long_test) self.assertEqual(11.2, index)
def test_smog_index(): index = textstat.smog_index(long_test) assert index == 11.2