def lex_readability(self, text, mode='fre'): if mode == 'all': fre_score = textstat.flesch_reading_ease(text) fog_index = textstat.gunning_fog(text) fkg_index = textstat.flesch_kincaid_grade(text) dcr_score = textstat.dale_chall_readability_score(text) text_standard = textstat.text_standard(text, float_output=True) return fre_score, fog_index, fkg_index, dcr_score, text_standard if mode == 'fre': fre_score = textstat.flesch_reading_ease(text) return fre_score if mode == 'fog': fog_index = textstat.gunning_fog(text) return fog_index if mode == 'fkg': fkg_index = textstat.flesch_kincaid_grade(text) return fkg_index if mode == 'dcr': dcr_score = textstat.dale_chall_readability_score(text) return dcr_score if mode == 'text_std': text_standard = textstat.text_standard(text, float_output=True) return text_standard
def test_text_standard(): standard = textstat.text_standard(long_test) assert standard == "9th and 10th grade" standard = textstat.text_standard(short_test) assert standard == "2nd and 3rd grade"
def test_text_standard(self): standard = textstat.text_standard(self.long_test) self.assertEqual("9th and 10th grade", standard) standard = textstat.text_standard(self.short_test) self.assertEqual("2nd and 3rd grade", standard)
def test_text_standard(): textstat.set_lang("en_US") standard = textstat.text_standard(long_test) assert standard == "9th and 10th grade" standard = textstat.text_standard(short_test) assert standard == "2nd and 3rd grade"
def get_readibility(text, metric="flesch_kincaid_grade"): """ Return a score which reveals a piece of text's readability level. Reference: https://chartbeat-labs.github.io/textacy/getting_started/quickstart.html https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests """ if metric == "flesch_kincaid_grade": result = textstat.flesch_kincaid_grade(text) elif metric == "flesch_reading_ease": result = textstat.flesch_reading_ease(text) elif metric == "smog_index": result = textstat.smog_index(text) elif metric == "coleman_liau_index": result = textstat.coleman_liau_index(text) elif metric == "automated_readability_index": result = textstat.automated_readability_index(text) elif metric == "dale_chall_readability_score": result = textstat.dale_chall_readability_score(text) elif metric == "difficult_words": result = textstat.difficult_words(text) elif metric == "linsear_write_formula": result = textstat.linsear_write_formula(text) elif metric == "gunning_fog": result = textstat.gunning_fog(text) elif metric == "text_standard": result = textstat.text_standard(text) else: print("ERROR: Please select correct metric!") result = None return result
def check_difficulty(self): text = self.textoutput #0-30 = college #50-60 = high school #60+ = middle school/elementary school try: grade_level = textstat.text_standard(text) reading_ease = textstat.flesch_reading_ease(text) #requires chart sentence_count = textstat.sentence_count(text) difficult_words = self.get_difficult_words(text) replacement_words = self.get_replacement_words(difficult_words) output = "Grade Level of Input Text: " + grade_level + "\n" #output = output + "Ease of Reading*: " + str(reading_ease) + "\n" output = output + "Sentence Count: " + str(sentence_count) + "\n" output = output + "Difficult Words Found: " + str( len(difficult_words)) + "\n" output = output + "Possible Replacements: " + "\n" for dw in replacement_words: output = output + dw + " -> " for word in replacement_words[dw]: output = output + word + ", " output = output + "\n" self.difficultyReport = output except: self.difficultyReport = "Error determining Difficulties"
def calculate_stats(data_folder): """Calculate stat of test.json file in a folder""" data_folder = Path(data_folder) for dataset in dataset_fields: print(f"loading {dataset}") field = dataset_fields[dataset]["text"].strip() sentences = [] for item in json.load(open(data_folder / dataset / "test.json")): sentences.append(item[field][-1] if type(item[field]) == list else item[field]) text = " ".join(sentences) lex_count = textstat.lexicon_count(text) print(lex_count) unique_words = count_words(text) print(f"all unique {len(unique_words)}") lower_unique_words = count_words(text, casing="lower") print(f"lowercase unique {len(lower_unique_words)}") upper_unique_words = count_words(text, casing="upper") print(f"uppercase unique {len(upper_unique_words)}") print(f"ratio {len(upper_unique_words) / len(unique_words)}") text_standard = textstat.text_standard(text, float_output=True) print(f"text_standard: {text_standard}") dale_chall_readability_score = textstat.dale_chall_readability_score(text) print(f"dale_chall_readability_score: {dale_chall_readability_score}") flesch_kincaid_grade = textstat.flesch_kincaid_grade(text) print(f"flesch_kincaid_grade: {flesch_kincaid_grade}")
def analyze(): print(request) str_to_read = request.data.decode("utf-8").strip() report = { "flesch-reading-ease": textstat.flesch_reading_ease(str_to_read), "smog-index": textstat.smog_index(str_to_read), "flesch-kincaid-grade": textstat.flesch_kincaid_grade(str_to_read), "coleman-liau-index": textstat.coleman_liau_index(str_to_read), "automated-readability-index": textstat.automated_readability_index(str_to_read), "dale-chall-readability-score": textstat.dale_chall_readability_score(str_to_read), "difficult-words": textstat.difficult_words(str_to_read), "linsear-write-formula": textstat.linsear_write_formula(str_to_read), "gunning-fog": textstat.gunning_fog(str_to_read), "text-standard": textstat.text_standard(str_to_read) } return decorate_response(jsonify(report))
def readability(queries): scores = pd.DataFrame(columns=[ 'Flesch', 'Smog', 'Flesch grade', 'Coleman', 'Automated', 'Dale', 'Difficult', 'Linsear', 'Gunning', 'Text Standard' ]) scores = { 'Flesch': [], 'Smog': [], 'Flesch grade': [], 'Coleman': [], 'Automated': [], 'Dale': [], 'Difficult': [], 'Linsear': [], 'Gunning': [], 'Text Standard': [] } for line in queries: # results = readability.getmeasures(line, lang='en') # frescores.append(results['readability grades']['FleschReadingEase']) # line = 'yao family wines . yao family wines is a napa valley producer founded in 2011 by yao ming , the chinese-born , five-time nba all star . now retired from the houston rockets , yao ming is the majority owner in yao family wines , which has entered the wine market with a luxury cabernet sauvignon sourced from napa valley vineyards .' scores['Flesch'].append(textstat.flesch_reading_ease(line)) scores['Smog'].append(textstat.smog_index(line)) scores['Flesch grade'].append(textstat.flesch_kincaid_grade(line)) scores['Coleman'].append(textstat.coleman_liau_index(line)) scores['Automated'].append(textstat.automated_readability_index(line)) scores['Dale'].append(textstat.dale_chall_readability_score(line)) scores['Difficult'].append(textstat.difficult_words(line)) scores['Linsear'].append(textstat.linsear_write_formula(line)) scores['Gunning'].append(textstat.gunning_fog(line)) scores['Text Standard'].append( textstat.text_standard(line, float_output=True)) return scores
def evaluate(self, text: str): extracted_emotions = self._emotion_detector.extract_emotions_from_raw_text( text) extracted_sentiment = self._google_nlp.extract_sentiment_from_raw_text( text) # self._ease_mapper[(round(textstat.flesch_reading_ease(text)) - 1) // 10], return { 'clarity': 5, 'text_general_level': self._general_level_mapper[textstat.text_standard( text, float_output=True)], 'diversity': self._uniqueness_mapper[self.text_uniqueness(text) * 10], 'tone': self.emotion_converter(extracted_emotions), 'emotional_tones': list(extracted_emotions.keys()), 'speech_sentiment': extracted_sentiment['sentiment'] * 10 if 'sentiment' in extracted_sentiment else 5, 'engagement': extracted_emotions['magnitude'] if 'magnitude' in extracted_emotions else 5, 'who_do_you_look_like': self.who_do_you_look_like(extracted_emotions), 'calmness': random.randint(5, 9) }
def transform(self, X): """ Transform X into a new dataset, Xprime and return it. """ X = pd.DataFrame(X) def countCaps(comment): count = 0 for c in comment: if c.isupper(): count += 1 return round(count * 100 / len(comment), 2) X['%OfUpperCaseLetters'] = X['Comment'].apply(countCaps) pattern = 'https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,}' X['NoOfURL'] = X['Comment'].apply( lambda x: len(re.findall(pattern, x))) X['NoOfWords'] = X['Comment'].apply(lambda x: (len(word_tokenize(x)))) X['AvgSentenceLength'] = X['Comment'].apply( lambda x: textstat.avg_sentence_length(x)) X['TextStandard'] = X['Comment'].apply( lambda x: textstat.text_standard(x, float_output=True)) self.X = X return self.X
def getReadabilityMetrics(test_data): ''' for a given article IN TEXT FORMAT, returns its readability metrics Uses textstat library, please install it ''' metric = { "flesch_reading_ease": textstat.flesch_reading_ease(test_data), "smog_index": textstat.smog_index(test_data), "flesch_kincaid_grade": textstat.flesch_kincaid_grade(test_data), "coleman_liau_index": textstat.coleman_liau_index(test_data), "automated_readability_index": textstat.automated_readability_index(test_data), "dale_chall_readability_score": textstat.dale_chall_readability_score(test_data), "difficult_words": textstat.difficult_words(test_data), "linsear_write_formula": textstat.linsear_write_formula(test_data), "gunning_fog": textstat.gunning_fog(test_data), "text_standard": textstat.text_standard(test_data) } return metric
def get_stats(text): fre = textstat.flesch_reading_ease(text) smog = textstat.smog_index(text) fkg = textstat.flesch_kincaid_grade(text) cli = textstat.coleman_liau_index(text) ari = textstat.automated_readability_index(text) dcr = textstat.dale_chall_readability_score(text) diff_words = textstat.difficult_words(text) lwf = textstat.linsear_write_formula(text) gunn_fog = textstat.gunning_fog(text) consolidated_score = textstat.text_standard(text) doc_length = len(text) # think about excluding spaces? quote_count = text.count('"') stats = { "flesch_reading_ease": fre, "smog_index": smog, "flesch_kincaid_grade": fkg, "coleman_liau_index": cli, "automated_readability_index": ari, "dale_chall_readability_score": dcr, "difficult_words": diff_words, "linsear_write_formula": lwf, "gunning_fog": gunn_fog, "consolidated_score": consolidated_score, "doc_length": doc_length, "quote_count": quote_count } return stats
def textStatistics(text): """ returns text statistics such as lexicon count and text standard in a tuple """ le_c = textstat.lexicon_count(text, removepunct=True) ts = textstat.text_standard(text, float_output=True) return le_c, ts
def reading_standard(text): x = textstat.text_standard(text) match = re.search(r'(.?\d+)th(\s\w{3}\s((.?\d+)))?', x) r_stan = [] if match: r_stan.append(match.group(1)) r_stan.append(match.group(3)) return r_stan
def getReadability(df): import textstat df['ARI'] = df.headline_text.apply( lambda x: textstat.automated_readability_index(x)) df['DCR'] = df.headline_text.apply( lambda x: textstat.dale_chall_readability_score(x)) df['TS'] = df.headline_text.apply( lambda x: textstat.text_standard(x, float_output=True)) return df
def score(self, strText): self.automated_readability_index = textstat.automated_readability_index( strText) self.str_automated_readability_index = self.grade( self.automated_readability_index) self.coleman_liau_index = textstat.coleman_liau_index(strText) self.str_coleman_liau_index = self.grade(self.coleman_liau_index) self.dale_chall_readability_score = textstat.dale_chall_readability_score( strText) if self.dale_chall_readability_score >= 9.0: self.str_dale_chall_readability_score = ' | ' + '13th to 15th grade (college)' elif self.dale_chall_readability_score >= 8.0: self.str_dale_chall_readability_score = ' | ' + '11th to 12th grade' elif self.dale_chall_readability_score >= 7.0: self.str_dale_chall_readability_score = ' | ' + '9th to 10th grade' elif self.dale_chall_readability_score >= 6.0: self.str_dale_chall_readability_score = ' | ' + '7th to 8th grade' elif self.dale_chall_readability_score >= 5.0: self.str_dale_chall_readability_score = ' | ' + '5th to 6th grade' else: self.str_dale_chall_readability_score = ' | ' + '4th grade or lower' self.difficult_words = textstat.difficult_words(strText) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(strText) self.str_flesch_kincaid_grade = self.grade(self.flesch_kincaid_grade) self.flesch_reading_ease = textstat.flesch_reading_ease(strText) if self.flesch_reading_ease >= 90: self.str_flesch_reading_ease = ' | ' + 'Very Easy' elif self.flesch_reading_ease >= 80: self.str_flesch_reading_ease = ' | ' + 'Easy' elif self.flesch_reading_ease >= 70: self.str_flesch_reading_ease = ' | ' + 'Fairly Easy' elif self.flesch_reading_ease >= 60: self.str_flesch_reading_ease = ' | ' + 'Standard' elif self.flesch_reading_ease >= 50: self.str_flesch_reading_ease = ' | ' + 'Fairly Difficult' elif self.flesch_reading_ease >= 30: self.str_flesch_reading_ease = ' | ' + 'Difficult' else: self.str_flesch_reading_ease = ' | ' + 'Very Confusing' self.gunning_fog = textstat.gunning_fog(strText) self.str_gunning_fog = self.grade(self.gunning_fog) self.linsear_write_formula = textstat.linsear_write_formula(strText) self.str_linsear_write_formula = self.grade(self.linsear_write_formula) self.smog_index = textstat.smog_index(strText) self.str_smog_index = self.grade(self.smog_index) self.text_standard = textstat.text_standard(strText)
def compute_readability_stats(text): """ Compute reading statistics of the given text Reference: https://github.com/shivam5992/textstat Parameters ========== text: str, input section or abstract text """ try: readability_dict = { 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall': textstat.dale_chall_readability_score(text), 'difficult_words': textstat.difficult_words(text), 'linsear_write': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text), 'n_syllable': textstat.syllable_count(text), 'avg_letter_per_word': textstat.avg_letter_per_word(text), 'avg_sentence_length': textstat.avg_sentence_length(text) } except: readability_dict = { 'flesch_reading_ease': None, 'smog': None, 'flesch_kincaid_grade': None, 'coleman_liau_index': None, 'automated_readability_index': None, 'dale_chall': None, 'difficult_words': None, 'linsear_write': None, 'gunning_fog': None, 'text_standard': None, 'n_syllable': None, 'avg_letter_per_word': None, 'avg_sentence_length': None } return readability_dict
def _call_textstat(desc): ''' Get an estimated school grade level required to understand the text from textstat package (reference: https://github.com/shivam5992/textstat) parameter desc[string]: description text ''' if type(desc) is str: res = textstat.text_standard(desc) grade = int(re.sub('[a-z]', '', res.split(' ')[0])) return grade else: raise TypeError(f"'desc' should be str but {type(desc)} detected")
def readability_scores(self, text): self.ari = textstat.automated_readability_index(text) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(text) self.coleman_liau_index = textstat.coleman_liau_index(text) self.dale_chall_readability_score = textstat.dale_chall_readability_score( text) self.flesch_reading_ease = textstat.flesch_reading_ease(text) self.gunning_fog = textstat.gunning_fog(text) self.linsear_write_formula = textstat.linsear_write_formula(text) self.lix = textstat.lix(text) self.rix = textstat.rix(text) self.smog_index = textstat.smog_index(text) self.text_standard = textstat.text_standard(text)
def get_readability_stats(text): return { 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog_index': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text, float_output=True), }
def score_comment(text): #counters nouncount = 0 #remove code but give points for it #search for {code} and add points here codecount = text.count('{code') text = re.sub(r'{code:(.|\r|\n)*{code}', '', text) #Check for link to PR linktopr = text.count('https://github.com') text = re.sub(r'https://github.com.*/pull', '', text) # Check for links to things linktothings = text.count('https://') text = re.sub(r'https?:\/\/.*[\r\n]*', '', text) # Count all sentences from all documents sentences = nltk.sent_tokenize(text) # tag speech tokens = nltk.word_tokenize(text) tagged_words = nltk.pos_tag(tokens) for type in tagged_words: if 'NN' in type[1]: nouncount += 1 # Count Entities entities = nltk.chunk.ne_chunk(tagged_words, binary=True) named_entities = [] for t in entities.subtrees(): if t.label() == 'NE': named_entities.append(t) # Check Complexity of language grade level complexity = textstat.text_standard(text, float_output=True) score = len(named_entities) * 10 + len( sentences ) * 2.5 + nouncount + codecount * 5 + linktopr * 10 + linktothings * 5 + complexity #For cases where extra code and things add to the count if score > 100: return 100 return score
def analyze_vocab(text): return { 'num_words': textstat.lexicon_count(text), 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog_index': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text), 'difficult_words': textstat.difficult_words(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text, float_output=True) }
def vocab_check(text): #Construct dictionary vocab_results = {'dale_chall_readability_score': dale_chall_readability_score(text), 'smog_index': smog_index(text), 'gunning_fog': gunning_fog(text), 'flesch_reading_ease': flesch_reading_ease(text), 'flesch_kincaid_grade': flesch_kincaid_grade(text), 'linsear_write_formula': linsear_write_formula(text), 'coleman_liau_index': coleman_liau_index(text), 'automated_readability_index': automated_readability_index(text), 'yule_vocab_richness': yule(text), 'total_score': text_standard(text, float_output=True)} diff_words, easy_word_dict = difficult_words(text) return(vocab_results, diff_words, easy_word_dict)
def extract_lexical_features(Authors): ''' Extract the readability and typed-token-ratio features Takes dictionary of authors as an input and returns the modified version. ''' # On raw text, get average grade level of the tweets for author in Authors.keys(): Authors[author].readability = 0 for tweet in Authors[author].tweets: Authors[author].readability += (textstat.text_standard(tweet, float_output=True)/len(Authors[author].tweets)) # i am angery at textstat # On lemmatized text, get the TTR to determine the lexical diversity for author in Authors.keys(): Authors[author].TTR = ld.ttr(Authors[author].clean) return Authors
def get_readability_features(self): sent_tokens = text_tokenizer(self.raw_text, replace_url_flag=True, tokenize_sent_flag=True) sentences = [' '.join(sent) + '\n' for sent in sent_tokens] sentences = ''.join(sentences) self.syllable_count = textstat.syllable_count(sentences) self.flesch_reading_ease = textstat.flesch_reading_ease(sentences) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(sentences) self.fog_scale = textstat.gunning_fog(sentences) self.smog = textstat.smog_index(sentences) self.automated_readability = textstat.automated_readability_index( sentences) self.coleman_liau = textstat.coleman_liau_index(sentences) self.linsear_write = textstat.linsear_write_formula(sentences) self.dale_chall_readability = textstat.dale_chall_readability_score( sentences) self.text_standard = textstat.text_standard(sentences)
def process_file(train_file, test_file, topic_model_file): dfs = { 'train': pd.read_csv(train_file), 'test': pd.read_csv(test_file), } # Remove outdated columns columns_to_remove = [ 'coleman_liau_index', 'automated_readability_index', 'dale_chall_readability_score', 'linsear_write_formula', 'gunning_fog', 'flesch_reading_ease', 'Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', ] for key, df in dfs.items(): for col in columns_to_remove: dfs[key] = dfs[key].drop(col, axis=1) print('Removed old columns') dfs[key]['readability_standard'] = df['text'].apply( lambda r: textstat.text_standard(r, float_output=True)) print('Added readability') dfs[key]['sentiment'] = df['text'].apply( lambda r: TextBlob(r).sentiment.polarity) print('Added sentiment') # Add topic scores corpus, topic_model = load_topic_model(topic_model_file) topics_vectorizer = TopicModelVectorizer(topic_model, corpus) topic_scores = {} topic_scores['train'] = topics_vectorizer.fit_transform(dfs['train']) topic_scores['test'] = topics_vectorizer.transform(dfs['test']) print('Fetched topic scores') for key in dfs.keys(): scores = topic_scores[key] scores_df = pd.DataFrame( data=scores, columns=[f'Topic #{i}' for i in range(scores.shape[1])], index=dfs[key].index) dfs[key] = dfs[key].merge(scores_df, left_index=True, right_index=True) print('Added topic scores') dfs['train'].to_csv(train_file, index=False) dfs['test'].to_csv(test_file, index=False)
def score_text(self, test_data): score = {} score['flesch_reading_ease'] = textstat.flesch_reading_ease(test_data) score['smog_index'] = textstat.smog_index(test_data) score['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade( test_data) score['coleman_liau_index'] = textstat.coleman_liau_index(test_data) score[ 'automated_readability_index'] = textstat.automated_readability_index( test_data) score[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( test_data) score['difficult_words'] = textstat.difficult_words(test_data) score['linsear_write_formula'] = textstat.linsear_write_formula( test_data) score['gunning_fog'] = textstat.gunning_fog(test_data) score['text_standard'] = textstat.text_standard(test_data) return score
def _extract_readability_scores(self, text: Text, scores=None) -> Dict: output = {} if scores == None or 'flesch_reading_ease' in scores: output['flesch_reading_ease'] = textstat.flesch_reading_ease(text) if scores == None or 'smog_index' in scores: output['smog_index'] = textstat.smog_index(text) if scores == None or 'flesch_kincaid_grade' in scores: output['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade( text) if scores == None or 'coleman_liau_index' in scores: output['coleman_liau_index'] = textstat.coleman_liau_index(text) if scores == None or 'automated_readability_index' in scores: output[ 'automated_readability_index'] = textstat.automated_readability_index( text) if scores == None or 'dale_chall_readability_score' in scores: output[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( text) if scores == None or 'difficult_words' in scores: output['difficult_words'] = textstat.difficult_words(text) if scores == None or 'linsear_write_formula' in scores: output['linsear_write_formula'] = textstat.linsear_write_formula( text) if scores == None or 'gunning_fog' in scores: output['gunning_fog'] = textstat.gunning_fog(text) if scores == None or 'text_standard' in scores: output['text_standard'] = textstat.text_standard(text, float_output=True) return output
def text_analysis(test_data): #flesch_reading_ease: higher scores indicate material that is easier to read. aim for >60.0 print ('flesch_reading_ease: '+str(textstat.flesch_reading_ease(test_data))) #smog_index: Calculates US grade level print ('smog_index: '+str(textstat.smog_index(test_data))) #flesch_kincaid_grade: Calculates US grade level print ('flesch_kincaid_grade: '+str(textstat.flesch_kincaid_grade(test_data))) #Colman Liau: Calculates US grade level print ('coleman_liau_index: '+str(textstat.coleman_liau_index(test_data))) #automated_readability_index: Calculates US grade level print ('automated_readability_index: '+str(textstat.automated_readability_index(test_data))) #Dale Chall Readability Score: 0.1579(dificult words / words *100) + 0.0496(words/sentences) print ('dale_chall_readability_score: '+str(textstat.dale_chall_readability_score(test_data))) #number of difficult words print ('difficult_words: '+str(textstat.difficult_words(test_data))) #Linsear Write: Calculates the U.S. grade level of a text sample based on sentence length and the number of words with three or more syllables. print ('linsear_write_formula: '+str(textstat.linsear_write_formula(test_data))) #gunning_frog: The text can be understood by someone who left full-time education at a later age than the index print ('gunning_fog: '+str(textstat.gunning_fog(test_data))) #text_standard: Calculates US grade level print ('text_standard: '+str(textstat.text_standard(test_data)))
def test_unicode_support(): textstat.text_standard( "\u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3059") textstat.text_standard(u"ありがとうございます")