def get_readibility(text, metric="flesch_kincaid_grade"): """ Return a score which reveals a piece of text's readability level. Reference: https://chartbeat-labs.github.io/textacy/getting_started/quickstart.html https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests """ if metric == "flesch_kincaid_grade": result = textstat.flesch_kincaid_grade(text) elif metric == "flesch_reading_ease": result = textstat.flesch_reading_ease(text) elif metric == "smog_index": result = textstat.smog_index(text) elif metric == "coleman_liau_index": result = textstat.coleman_liau_index(text) elif metric == "automated_readability_index": result = textstat.automated_readability_index(text) elif metric == "dale_chall_readability_score": result = textstat.dale_chall_readability_score(text) elif metric == "difficult_words": result = textstat.difficult_words(text) elif metric == "linsear_write_formula": result = textstat.linsear_write_formula(text) elif metric == "gunning_fog": result = textstat.gunning_fog(text) elif metric == "text_standard": result = textstat.text_standard(text) else: print("ERROR: Please select correct metric!") result = None return result
def get_stats(text): fre = textstat.flesch_reading_ease(text) smog = textstat.smog_index(text) fkg = textstat.flesch_kincaid_grade(text) cli = textstat.coleman_liau_index(text) ari = textstat.automated_readability_index(text) dcr = textstat.dale_chall_readability_score(text) diff_words = textstat.difficult_words(text) lwf = textstat.linsear_write_formula(text) gunn_fog = textstat.gunning_fog(text) consolidated_score = textstat.text_standard(text) doc_length = len(text) # think about excluding spaces? quote_count = text.count('"') stats = { "flesch_reading_ease": fre, "smog_index": smog, "flesch_kincaid_grade": fkg, "coleman_liau_index": cli, "automated_readability_index": ari, "dale_chall_readability_score": dcr, "difficult_words": diff_words, "linsear_write_formula": lwf, "gunning_fog": gunn_fog, "consolidated_score": consolidated_score, "doc_length": doc_length, "quote_count": quote_count } return stats
def seven_test(processed_essay): """ score which is assigned to every script in on the basis of some predifened fomulas These scores are known as readability score. flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write :param processed_essay: :return:flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write """ flesch_score = ["FS"] gunning_index = ["GI"] kincaid_grade = ["KG"] liau_index = ["LI"] automated_readability_index = ["ARI"] dale_readability_score = ["DLS"] difficult_word = ["DW"] linsear_write = ["LW"] for v in processed_essay: flesch_score.append(textstat.flesch_reading_ease(str(v))) gunning_index.append(textstat.gunning_fog(str(v))) kincaid_grade.append(textstat.flesch_kincaid_grade(str(v))) liau_index.append(textstat.coleman_liau_index(str(v))) automated_readability_index.append(textstat.automated_readability_index(str(v))) dale_readability_score.append(textstat.dale_chall_readability_score(str(v))) difficult_word.append(textstat.difficult_words(str(v))) linsear_write.append(textstat.linsear_write_formula(str(v))) return flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write
def get_readability_score(text, metric="flesch"): global tknzr, DIFFICULT text = text.replace("’", "'") # https://pypi.org/project/textstat/ if metric == "flesch": return textstat.flesch_reading_ease(text) elif metric == "smog": return textstat.smog_index(text) elif metric == "coleman_liau_index": return textstat.coleman_liau_index(text) elif metric == "automated_readability_index": return textstat.automated_readability_index(text) elif metric == "dale_chall_readability_score": return textstat.dale_chall_readability_score(text) elif metric == "difficult_words": nb_difficult = 0 nb_easy = 0 for w in set(tknzr.tokenize(text.lower())): if w not in EASY_WORDS and len(w) >= 6: nb_difficult += 1 else: nb_easy += 1 return 100 * nb_difficult / (nb_difficult + nb_easy) #return textstat.difficult_words(text)#/len(text.split()) elif metric == "linsear_write_formula": return textstat.linsear_write_formula(text) elif metric == "gunning_fog": return textstat.gunning_fog(text) elif metric == "avg_word_length": words = tknzr.tokenize(text) words = [w for w in words if w not in misc_utils.PUNCT] if len(words) == 0: return 0 return np.average([len(w) for w in words])
def analyze(): print(request) str_to_read = request.data.decode("utf-8").strip() report = { "flesch-reading-ease": textstat.flesch_reading_ease(str_to_read), "smog-index": textstat.smog_index(str_to_read), "flesch-kincaid-grade": textstat.flesch_kincaid_grade(str_to_read), "coleman-liau-index": textstat.coleman_liau_index(str_to_read), "automated-readability-index": textstat.automated_readability_index(str_to_read), "dale-chall-readability-score": textstat.dale_chall_readability_score(str_to_read), "difficult-words": textstat.difficult_words(str_to_read), "linsear-write-formula": textstat.linsear_write_formula(str_to_read), "gunning-fog": textstat.gunning_fog(str_to_read), "text-standard": textstat.text_standard(str_to_read) } return decorate_response(jsonify(report))
def readability(queries): scores = pd.DataFrame(columns=[ 'Flesch', 'Smog', 'Flesch grade', 'Coleman', 'Automated', 'Dale', 'Difficult', 'Linsear', 'Gunning', 'Text Standard' ]) scores = { 'Flesch': [], 'Smog': [], 'Flesch grade': [], 'Coleman': [], 'Automated': [], 'Dale': [], 'Difficult': [], 'Linsear': [], 'Gunning': [], 'Text Standard': [] } for line in queries: # results = readability.getmeasures(line, lang='en') # frescores.append(results['readability grades']['FleschReadingEase']) # line = 'yao family wines . yao family wines is a napa valley producer founded in 2011 by yao ming , the chinese-born , five-time nba all star . now retired from the houston rockets , yao ming is the majority owner in yao family wines , which has entered the wine market with a luxury cabernet sauvignon sourced from napa valley vineyards .' scores['Flesch'].append(textstat.flesch_reading_ease(line)) scores['Smog'].append(textstat.smog_index(line)) scores['Flesch grade'].append(textstat.flesch_kincaid_grade(line)) scores['Coleman'].append(textstat.coleman_liau_index(line)) scores['Automated'].append(textstat.automated_readability_index(line)) scores['Dale'].append(textstat.dale_chall_readability_score(line)) scores['Difficult'].append(textstat.difficult_words(line)) scores['Linsear'].append(textstat.linsear_write_formula(line)) scores['Gunning'].append(textstat.gunning_fog(line)) scores['Text Standard'].append( textstat.text_standard(line, float_output=True)) return scores
def getReadabilityMetrics(test_data): ''' for a given article IN TEXT FORMAT, returns its readability metrics Uses textstat library, please install it ''' metric = { "flesch_reading_ease": textstat.flesch_reading_ease(test_data), "smog_index": textstat.smog_index(test_data), "flesch_kincaid_grade": textstat.flesch_kincaid_grade(test_data), "coleman_liau_index": textstat.coleman_liau_index(test_data), "automated_readability_index": textstat.automated_readability_index(test_data), "dale_chall_readability_score": textstat.dale_chall_readability_score(test_data), "difficult_words": textstat.difficult_words(test_data), "linsear_write_formula": textstat.linsear_write_formula(test_data), "gunning_fog": textstat.gunning_fog(test_data), "text_standard": textstat.text_standard(test_data) } return metric
def textstat_stats(text): doc_length = len(text.split()) flesch_ease = ts.flesch_reading_ease(text) #Flesch Reading Ease Score flesch_grade = ts.flesch_kincaid_grade(text) #Flesch-Kincaid Grade Level gfog = ts.gunning_fog(text) # FOG index, also indicates grade level # smog = ts.smog_index(text) # SMOG index, also indicates grade level, only useful on 30+ sentences auto_readability = ts.automated_readability_index(text) #approximates the grade level needed to comprehend the text. cl_index = ts.coleman_liau_index(text) #grade level of the text using the Coleman-Liau Formula. lw_formula = ts.linsear_write_formula(text) #grade level using the Linsear Write Formula. dcr_score = ts.dale_chall_readability_score(text) #uses a lookup table of the most commonly used 3000 English words # text_standard = ts.text_standard(text, float_output=False) # summary of all the grade level functions syll_count = ts.syllable_count(text, lang='en_US') syll_count_scaled = syll_count / doc_length lex_count = ts.lexicon_count(text, removepunct=True) lex_count_scaled = lex_count / doc_length idx = ['flesch_ease', 'flesch_grade','gfog', 'auto_readability','cl_index','lw_formula', 'dcr_score', # 'text_standard', 'syll_count', 'lex_count'] return pd.Series([flesch_ease, flesch_grade, gfog, auto_readability, cl_index, lw_formula, dcr_score, # text_standard, syll_count_scaled, lex_count_scaled], index = idx)
def compute_readability_stats(text): """ Compute reading statistics of the given text Reference: https://github.com/shivam5992/textstat Parameters ========== text: str, input section or abstract text """ try: readability_dict = { 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall': textstat.dale_chall_readability_score(text), 'difficult_words': textstat.difficult_words(text), 'linsear_write': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text), 'n_syllable': textstat.syllable_count(text), 'avg_letter_per_word': textstat.avg_letter_per_word(text), 'avg_sentence_length': textstat.avg_sentence_length(text) } except: readability_dict = { 'flesch_reading_ease': None, 'smog': None, 'flesch_kincaid_grade': None, 'coleman_liau_index': None, 'automated_readability_index': None, 'dale_chall': None, 'difficult_words': None, 'linsear_write': None, 'gunning_fog': None, 'text_standard': None, 'n_syllable': None, 'avg_letter_per_word': None, 'avg_sentence_length': None } return readability_dict
def score(self, strText): self.automated_readability_index = textstat.automated_readability_index( strText) self.str_automated_readability_index = self.grade( self.automated_readability_index) self.coleman_liau_index = textstat.coleman_liau_index(strText) self.str_coleman_liau_index = self.grade(self.coleman_liau_index) self.dale_chall_readability_score = textstat.dale_chall_readability_score( strText) if self.dale_chall_readability_score >= 9.0: self.str_dale_chall_readability_score = ' | ' + '13th to 15th grade (college)' elif self.dale_chall_readability_score >= 8.0: self.str_dale_chall_readability_score = ' | ' + '11th to 12th grade' elif self.dale_chall_readability_score >= 7.0: self.str_dale_chall_readability_score = ' | ' + '9th to 10th grade' elif self.dale_chall_readability_score >= 6.0: self.str_dale_chall_readability_score = ' | ' + '7th to 8th grade' elif self.dale_chall_readability_score >= 5.0: self.str_dale_chall_readability_score = ' | ' + '5th to 6th grade' else: self.str_dale_chall_readability_score = ' | ' + '4th grade or lower' self.difficult_words = textstat.difficult_words(strText) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(strText) self.str_flesch_kincaid_grade = self.grade(self.flesch_kincaid_grade) self.flesch_reading_ease = textstat.flesch_reading_ease(strText) if self.flesch_reading_ease >= 90: self.str_flesch_reading_ease = ' | ' + 'Very Easy' elif self.flesch_reading_ease >= 80: self.str_flesch_reading_ease = ' | ' + 'Easy' elif self.flesch_reading_ease >= 70: self.str_flesch_reading_ease = ' | ' + 'Fairly Easy' elif self.flesch_reading_ease >= 60: self.str_flesch_reading_ease = ' | ' + 'Standard' elif self.flesch_reading_ease >= 50: self.str_flesch_reading_ease = ' | ' + 'Fairly Difficult' elif self.flesch_reading_ease >= 30: self.str_flesch_reading_ease = ' | ' + 'Difficult' else: self.str_flesch_reading_ease = ' | ' + 'Very Confusing' self.gunning_fog = textstat.gunning_fog(strText) self.str_gunning_fog = self.grade(self.gunning_fog) self.linsear_write_formula = textstat.linsear_write_formula(strText) self.str_linsear_write_formula = self.grade(self.linsear_write_formula) self.smog_index = textstat.smog_index(strText) self.str_smog_index = self.grade(self.smog_index) self.text_standard = textstat.text_standard(strText)
def process(self, df): t0 = time() print("\n---Generating Readability Features:---\n") def lexical_diversity(text): words = nltk.tokenize.word_tokenize(text.lower()) word_count = len(words) vocab_size = len(set(words)) diversity_score = vocab_size / word_count return diversity_score def get_counts(text, word_list): words = nltk.tokenize.word_tokenize(text.lower()) count = 0 for word in words: if word in word_list: count += 1 return count df['flesch_reading_ease'] = df['articleBody'].map(lambda x: textstat.flesch_reading_ease(x)) df['smog_index'] = df['articleBody'].map(lambda x: textstat.smog_index(x)) df['flesch_kincaid_grade'] = df['articleBody'].map(lambda x: textstat.flesch_kincaid_grade(x)) df['coleman_liau_index'] = df['articleBody'].map(lambda x: textstat.coleman_liau_index(x)) df['automated_readability_index'] = df['articleBody'].map(lambda x: textstat.automated_readability_index(x)) df['dale_chall_readability_score'] = df['articleBody'].map(lambda x: textstat.dale_chall_readability_score(x)) df['difficult_words'] = df['articleBody'].map(lambda x: textstat.difficult_words(x)) df['linsear_write_formula'] = df['articleBody'].map(lambda x: textstat.linsear_write_formula(x)) df['gunning_fog'] = df['articleBody'].map(lambda x: textstat.gunning_fog(x)) df['i_me_myself'] = df['articleBody'].apply(get_counts,args = (['i', 'me', 'myself'],)) df['punct'] = df['articleBody'].apply(get_counts,args = ([',','.', '!', '?'],)) df['lexical_diversity'] = df['articleBody'].apply(lexical_diversity) feats = ['flesch_reading_ease', 'smog_index', 'flesch_kincaid_grade', 'coleman_liau_index', 'automated_readability_index', 'dale_chall_readability_score', 'difficult_words', 'linsear_write_formula', 'gunning_fog', 'i_me_myself', 'punct', 'lexical_diversity' ] outfilename_xReadable = df[feats].values with open('../saved_data/read.pkl', 'wb') as outfile: pickle.dump(feats, outfile, -1) pickle.dump(outfilename_xReadable, outfile, -1) print ('readable features saved in read.pkl') print('\n---Readability Features is complete---') print("Time taken {} seconds\n".format(time() - t0)) return 1
def readability_scores(self, text): self.ari = textstat.automated_readability_index(text) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(text) self.coleman_liau_index = textstat.coleman_liau_index(text) self.dale_chall_readability_score = textstat.dale_chall_readability_score( text) self.flesch_reading_ease = textstat.flesch_reading_ease(text) self.gunning_fog = textstat.gunning_fog(text) self.linsear_write_formula = textstat.linsear_write_formula(text) self.lix = textstat.lix(text) self.rix = textstat.rix(text) self.smog_index = textstat.smog_index(text) self.text_standard = textstat.text_standard(text)
def get_readability_stats(text): return { 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog_index': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text, float_output=True), }
def vocab_check(text): #Construct dictionary vocab_results = {'dale_chall_readability_score': dale_chall_readability_score(text), 'smog_index': smog_index(text), 'gunning_fog': gunning_fog(text), 'flesch_reading_ease': flesch_reading_ease(text), 'flesch_kincaid_grade': flesch_kincaid_grade(text), 'linsear_write_formula': linsear_write_formula(text), 'coleman_liau_index': coleman_liau_index(text), 'automated_readability_index': automated_readability_index(text), 'yule_vocab_richness': yule(text), 'total_score': text_standard(text, float_output=True)} diff_words, easy_word_dict = difficult_words(text) return(vocab_results, diff_words, easy_word_dict)
def analyze_vocab(text): return { 'num_words': textstat.lexicon_count(text), 'flesch_reading_ease': textstat.flesch_reading_ease(text), 'smog_index': textstat.smog_index(text), 'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text), 'coleman_liau_index': textstat.coleman_liau_index(text), 'automated_readability_index': textstat.automated_readability_index(text), 'dale_chall_readability_score': textstat.dale_chall_readability_score(text), 'difficult_words': textstat.difficult_words(text), 'linsear_write_formula': textstat.linsear_write_formula(text), 'gunning_fog': textstat.gunning_fog(text), 'text_standard': textstat.text_standard(text, float_output=True) }
def lisibilty(text): f_lis = ([ textstat.syllable_count(str(text), lang='en_arabic'), textstat.lexicon_count(str(text), removepunct=True), textstat.sentence_count(str(text)), textstat.flesch_reading_ease(str(text)), textstat.flesch_kincaid_grade(str(text)), textstat.gunning_fog(str(text)), textstat.smog_index(str(text)), textstat.automated_readability_index(str(text)), textstat.coleman_liau_index(str(text)), textstat.linsear_write_formula(str(text)), textstat.dale_chall_readability_score(str(text)) ]) return f_lis
def textstat_stats(text): difficulty = textstat.flesch_reading_ease(text) grade_difficulty = textstat.flesch_kincaid_grade(text) gfog = textstat.gunning_fog(text) smog = textstat.smog_index(text) ari = textstat.automated_readability_index(text) cli = textstat.coleman_liau_index(text) lwf = textstat.linsear_write_formula(text) dcrs = textstat.dale_chall_readability_score(text) idx = [ 'difficulty', 'grade_difficulty', 'gfog', 'smog', 'ari', 'cli', 'lwf', 'dcrs' ] return pd.Series( [difficulty, grade_difficulty, gfog, smog, ari, cli, lwf, dcrs], index=idx)
def get_readability_features(self): sent_tokens = text_tokenizer(self.raw_text, replace_url_flag=True, tokenize_sent_flag=True) sentences = [' '.join(sent) + '\n' for sent in sent_tokens] sentences = ''.join(sentences) self.syllable_count = textstat.syllable_count(sentences) self.flesch_reading_ease = textstat.flesch_reading_ease(sentences) self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(sentences) self.fog_scale = textstat.gunning_fog(sentences) self.smog = textstat.smog_index(sentences) self.automated_readability = textstat.automated_readability_index( sentences) self.coleman_liau = textstat.coleman_liau_index(sentences) self.linsear_write = textstat.linsear_write_formula(sentences) self.dale_chall_readability = textstat.dale_chall_readability_score( sentences) self.text_standard = textstat.text_standard(sentences)
def score_text(self, test_data): score = {} score['flesch_reading_ease'] = textstat.flesch_reading_ease(test_data) score['smog_index'] = textstat.smog_index(test_data) score['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade( test_data) score['coleman_liau_index'] = textstat.coleman_liau_index(test_data) score[ 'automated_readability_index'] = textstat.automated_readability_index( test_data) score[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( test_data) score['difficult_words'] = textstat.difficult_words(test_data) score['linsear_write_formula'] = textstat.linsear_write_formula( test_data) score['gunning_fog'] = textstat.gunning_fog(test_data) score['text_standard'] = textstat.text_standard(test_data) return score
def _extract_readability_scores(self, text: Text, scores=None) -> Dict: output = {} if scores == None or 'flesch_reading_ease' in scores: output['flesch_reading_ease'] = textstat.flesch_reading_ease(text) if scores == None or 'smog_index' in scores: output['smog_index'] = textstat.smog_index(text) if scores == None or 'flesch_kincaid_grade' in scores: output['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade( text) if scores == None or 'coleman_liau_index' in scores: output['coleman_liau_index'] = textstat.coleman_liau_index(text) if scores == None or 'automated_readability_index' in scores: output[ 'automated_readability_index'] = textstat.automated_readability_index( text) if scores == None or 'dale_chall_readability_score' in scores: output[ 'dale_chall_readability_score'] = textstat.dale_chall_readability_score( text) if scores == None or 'difficult_words' in scores: output['difficult_words'] = textstat.difficult_words(text) if scores == None or 'linsear_write_formula' in scores: output['linsear_write_formula'] = textstat.linsear_write_formula( text) if scores == None or 'gunning_fog' in scores: output['gunning_fog'] = textstat.gunning_fog(text) if scores == None or 'text_standard' in scores: output['text_standard'] = textstat.text_standard(text, float_output=True) return output
def text_analysis(test_data): #flesch_reading_ease: higher scores indicate material that is easier to read. aim for >60.0 print ('flesch_reading_ease: '+str(textstat.flesch_reading_ease(test_data))) #smog_index: Calculates US grade level print ('smog_index: '+str(textstat.smog_index(test_data))) #flesch_kincaid_grade: Calculates US grade level print ('flesch_kincaid_grade: '+str(textstat.flesch_kincaid_grade(test_data))) #Colman Liau: Calculates US grade level print ('coleman_liau_index: '+str(textstat.coleman_liau_index(test_data))) #automated_readability_index: Calculates US grade level print ('automated_readability_index: '+str(textstat.automated_readability_index(test_data))) #Dale Chall Readability Score: 0.1579(dificult words / words *100) + 0.0496(words/sentences) print ('dale_chall_readability_score: '+str(textstat.dale_chall_readability_score(test_data))) #number of difficult words print ('difficult_words: '+str(textstat.difficult_words(test_data))) #Linsear Write: Calculates the U.S. grade level of a text sample based on sentence length and the number of words with three or more syllables. print ('linsear_write_formula: '+str(textstat.linsear_write_formula(test_data))) #gunning_frog: The text can be understood by someone who left full-time education at a later age than the index print ('gunning_fog: '+str(textstat.gunning_fog(test_data))) #text_standard: Calculates US grade level print ('text_standard: '+str(textstat.text_standard(test_data)))
def print_readability(text_to_analyse, option='short'): if option == 'all': print( "flesch (0-29: confusing, 30-59: Difficult, 60-69: Standard, 70-100: Easy): ", textstat.flesch_reading_ease(text_to_analyse)) print("smog (years of education required): ", textstat.smog_index(text_to_analyse)) print( "flesch kinkaid (70-100: Fairly Easy; 60-70: Plain English; 30-60: Fairly Difficult; 30-0: Very Difficult): ", textstat.flesch_kincaid_grade(text_to_analyse)) print("coleman liau: ", textstat.coleman_liau_index(text_to_analyse)) print( "auto read (1-4: 5-10 years age; 5-8: 10-14 y; 9-12: 14-18 y; 13-14: 18+): ", textstat.automated_readability_index(text_to_analyse)) print("dale chall (< 5: kid; 5-8: scholar; 9-10: college): ", textstat.dale_chall_readability_score(text_to_analyse)) print("difficult words: ", textstat.difficult_words(text_to_analyse)) print("linsear write: ", textstat.linsear_write_formula(text_to_analyse)) print("gunning fog (9-12: High-school; 13-17: College): ", textstat.gunning_fog(text_to_analyse)) print("text standard (estimated school grade level): ", textstat.text_standard(text_to_analyse))
smog_index = textstat.smog_index(raw) worksheet.update("H" + row, smog_index) # Automated Readability Index # https://en.wikipedia.org/wiki/Automated_readability_index automated_readability_index = textstat.automated_readability_index(raw) worksheet.update("I" + row, automated_readability_index) # The Coleman-Liau Index # https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index coleman_liau_index = textstat.coleman_liau_index(raw) worksheet.update("J" + row, coleman_liau_index) # Linsear Write Formula # https://en.wikipedia.org/wiki/Linsear_Write linsear_write_formula = textstat.linsear_write_formula(raw) worksheet.update("K" + row, linsear_write_formula) # Dale-Chall Readability Score # < 4.9 - average 4th-grade student | 5.0–5.9 - average 5th or 6th-grade # 6.0–6.9 - average 7th or 8th-grade | 7.0–7.9 - average 9th or 10th-grade # 8.0–8.9 average 11th or 12th-grade | 9.0–9.9 - college student dale_chall_readability_score = textstat.dale_chall_readability_score( raw) worksheet.update("L" + row, dale_chall_readability_score) # Readability Consensus based upon all the above tests # Estimated school grade level required to understand the text text_standard = textstat.text_standard(raw, float_output=False) worksheet.update("M" + row, text_standard)
def test_linsear_write_formula(): textstat.set_lang("en_US") result = textstat.linsear_write_formula(long_test) assert result == 14.5
def test_linsear_write_formula(): result = textstat.linsear_write_formula(long_test) assert result == 14.5
def main(dir: str): checker = language_tool_python.LanguageTool('en-US') emails = {} totalWords = '' filenames = [ filename for filename in os.listdir(dir) if filename.endswith('.eml') ] for filename in filenames: print() print('[INFO] Processing {}...'.format(filename)) with open(os.path.join(dir, filename), 'r', encoding='latin1') as file: try: mail = mailparser.parse_from_file_obj(file) except Exception as e: print('[WARNING] Error while parsing: {}'.format(e)) continue # filter duplicates based on subject #if mail.subject in emails: # print('[WARNING] This email seems to be a duplicate of "{}"! Skipping...' # .format(emails[mail.subject]['filename'])) # continue # don't process if auth results missing # if 'Authentication-Results' not in mail.headers: # print('[WARNING] This email is missing an authentication results header! Skipping...') # continue attachments = '' for attachment in mail.attachments: attachment['filename'] = re.sub(r'<|>', '', attachment['filename']) try: mail.write_attachments(dir) for attachment in mail.attachments: if re.search('image', attachment['mail_content_type']): if re.search('gif', attachment['mail_content_type']): images, _, _ = gif2numpy.convert( dir + '\\' + attachment['filename']) img = images[0] else: img = cv2.imread(dir + '\\' + attachment['filename']) img = cv2.resize(img, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC) text = pytesseract.image_to_string(img) attachments += text elif re.search('pdf', attachment['mail_content_type']): encoding = chardet.detect( pdf_to_text(dir + '\\' + attachment['filename']))['encoding'] attachments += pdf_to_text( dir + '\\' + attachment['filename']).decode(encoding) # elif re.search('text', attachment['mail_content_type']): # #print(chardet.detect((attachment['payload']).encode())) # #encoding = chardet.detect(base64.b64decode(attachment['payload']).encode())['encoding'] # #attachments += base64.b64decode(attachment['payload']).decode(encoding) # #print(codecs.encode(base64.b64decode(attachment['payload']), encoding=attachment['content_transfer_encoding'])) # attachments += attachment['payload'] else: attachments += attachment['payload'] os.remove(dir + '\\' + attachment['filename']) except Exception as e: print( '[WARNING] Error while parsing attachments: {}'.format(e)) [ os.remove(dir + '\\' + attachment['filename']) for attachment in mail.attachments ] body = mail.subject + ' ' + \ remove_noise(BeautifulSoup(mail.body, 'lxml').get_text(separator=' ', strip=True) + BeautifulSoup(attachments, 'lxml').get_text()) blob = TextBlob(body) totalWords = totalWords + " " + body.lower() grammarErrors = checker.check(body) if 'Authentication-Results' in mail.headers: spf = re.findall('spf=(\S*)', mail.headers['Authentication-Results']) dkim = re.findall('dkim=(\S*)', mail.headers['Authentication-Results']) dmarc = re.findall('dmarc=(\S*)', mail.headers['Authentication-Results']) else: spf = dkim = dmarc = '' emails[filename] = { 'filename': filename, # 'hops': mail.received[-1]['hop'], # 'totalDelay': sum([hop['delay']/60 for hop in mail.received]), 'spf': spf[0] if len(spf) else None, 'dkim': dkim[0] if len(dkim) else None, 'dmarc': dmarc[0] if len(dmarc) else None, 'subject': mail.subject, 'from': mail.from_[0][1], 'to': [tup[1] for tup in mail.to], 'replyTo': [tup[1] for tup in mail.reply_to], 'attachments': [x['filename'] for x in mail.attachments], 'grammarErrors': len(grammarErrors), 'counts': { 'characterCount': len(body), 'wordCount': textstat.lexicon_count(body), 'sentenceCount': textstat.sentence_count(body) }, 'readability': { 'flesch_kincaid': textstat.flesch_kincaid_grade(body), 'gunning_fog': textstat.gunning_fog(body), 'smog_index': textstat.smog_index(body), 'automated_readability_index': textstat.automated_readability_index(body), 'coleman_liau_index': textstat.coleman_liau_index(body), 'linsear_write': textstat.linsear_write_formula(body), }, 'sentiment': { 'polarity': blob.sentiment.polarity, 'subjectivity': blob.sentiment.subjectivity } } if save_body: emails[filename]['body'] = body ## quit if nothing found ## # if not emails: # print('[WARNING] No files were found in "{}"!'.format(dir)) # return ## writing all words to file ## with open(os.path.join(dir, 'words.txt'), 'w', encoding='utf-8') as file: file.write(totalWords.lower()) ## output json ## with open(os.path.join(dir, 'analysis.json'), 'w') as jsonFile: json.dump(emails, jsonFile, indent=2) ## build and output csv ## # generate and output headers using first email column_headers = list(flatten_json(emails[list(emails.keys())[0]]).keys()) csvFile = open(os.path.join(dir, 'analysis.csv'), 'w', encoding='utf-8') csvFile.write(',{}\n'.format(','.join(column_headers))) # generate and output one line per email for email in emails.keys(): # flatten json to 1 layer deep flattened_email = flatten_json(emails[email]) # generate the values for this row csv_values = [ '"' + str(flattened_email[column_header]) + '"' for column_header in column_headers ] # add email name and join w/ commas, then write out csvFile.write('{},{}\n'.format('"' + email + '"', ','.join(csv_values))) csvFile.close() # print out stats print('{}/{} processed. The remaining failed for some reason.'.format( len(emails), len(filenames)))
def test_linsear_write_formula(self): result = textstat.linsear_write_formula(self.long_test) self.assertEqual(14.5, result)
if w[0].isupper(): cnt += 1 capital_count.append(cnt / len(cap_words)) #obatining readability features reviews[i] = reviews[i].strip().lower().replace("\'", '') kingrade.append(textstat.flesch_kincaid_grade(reviews[i])) gunning.append(textstat.gunning_fog(reviews[i])) flesch_reading_ease1.append(textstat.flesch_reading_ease(reviews[i])) difficult_words1.append(textstat.difficult_words(reviews[i])) smog_index1.append(textstat.smog_index(reviews[i])) automated_readability_index1.append( textstat.automated_readability_index(reviews[i])) coleman_liau_index1.append(textstat.coleman_liau_index(reviews[i])) linsear_write_formula1.append( textstat.linsear_write_formula(reviews[i])) dale_chall_readability_score1.append( textstat.dale_chall_readability_score(reviews[i])) word_freq = [] #obtaining punctuation count words = word_tokenize(reviews[i]) punct = [w for w in words if w in ['.', ',', ';', '?', ':', '!']] punct_count.append(len(punct) / len(words)) #obtaining stopwords frequency word = [ w for w in words if w not in ['.', ',', ';', '?', ':', '!', '"', "'", '#'] ] corpus.append(reviews[i])
def get_redability_assessments(data_text: str) -> Optional[dict]: divided_text = tokenize.sent_tokenize(data_text) word_tokenizes = nltk.word_tokenize(data_text) pos_tags = nltk.pos_tag(word_tokenizes) pos_tags_tagger = TAGGER.tag(word_tokenizes) f_dist = nltk.FreqDist(word_tokenizes) uniqueWordCount = compute_unique_word_count(f_dist.most_common()) paragraphCount = max(len(data_text.split('\n')), len(data_text.split('\r\n'))) counts = Counter(tag for word, tag in pos_tags) # Readability Grade Levels readability_grade_levels = dict(fleschKincaid=0, gunningFog=0, colemanLiau=0, smog=0, ari=0, forecastGradeLevel=0, powersSumnerKearlGrade=0, rix=0, raygorReadability=0, fryReadability=0, flesch=0) readability_grade_levels.update(fleschKincaid=textstat.flesch_kincaid_grade(data_text)) readability_grade_levels.update(gunningFog=textstat.gunning_fog(data_text)) readability_grade_levels.update(colemanLiau=textstat.coleman_liau_index(data_text)) readability_grade_levels.update(smog=textstat.smog_index(data_text)) readability_grade_levels.update(ari=textstat.automated_readability_index(data_text)) readability_grade_levels.update(rix=textstat.rix(data_text)) # need to check readability_grade_levels.update(forcastGradeLevel=round(20 - (textstat.avg_syllables_per_word(data_text) / 10), 2)) readability_grade_levels.update(powersSumnerKearlGrade=round(textstat.avg_sentence_length(data_text) + textstat.avg_syllables_per_word(data_text) + 2.7971, 2)) readability_grade_levels.update(raygorReadability=count_raygor_readability(divided_text)) readability_grade_levels.update(fryReadability=count_fry_readability(divided_text)) # need to check readability_grade_levels.update(flesch=textstat.flesch_reading_ease(data_text)) # Readability Scores readability_scores = dict(readableRating="", fleschReadingEase=0, cefrLevel='', ieltsLevel='', spacheScore=0, newDaleChallScore=0, lixReadability=0, lensearWrite=0) readability_scores.update(readableRating=count_average_grade_levels(readability_grade_levels)) readability_scores.update(fleschReadingEase=textstat.flesch_reading_ease(data_text)) readability_scores.update(cefrLevel=count_cefr_levels(readability_grade_levels)) readability_scores.update(ieltsLevel=count_ielts_levels(readability_grade_levels)) readability_scores.update(spacheScore=round(textstat.spache_readability(data_text), 2)) readability_scores.update(newDaleChallScore=textstat.dale_chall_readability_score_v2(data_text)) readability_scores.update(lixReadability=textstat.lix(data_text)) readability_scores.update(lensearWrite=textstat.linsear_write_formula(data_text)) # Text Statistics text_statistics = dict(characterCount=0, syllableCount=0, wordCount=0, uniqueWordCount=0, sentenceCount=0, paragraphCount=0) text_statistics.update(characterCount=textstat.char_count(data_text)) text_statistics.update(syllableCount=textstat.syllable_count(data_text)) text_statistics.update(wordCount=textstat.lexicon_count(data_text)) text_statistics.update(uniqueWordCount=uniqueWordCount) text_statistics.update(sentenceCount=textstat.sentence_count(data_text)) text_statistics.update(paragraphCount=paragraphCount) # Timings timings_statistics = dict(readingTime=0, speakingTime=0) timings_statistics.update(readingTime=reading_time(textstat.lexicon_count(data_text))) timings_statistics.update(speakingTime=speaking_time(textstat.lexicon_count(data_text))) # Text Composition text_composition = dict(adjectives=0, adverbs=0, conjunctions=0, determiners=0, interjections=0, nouns=0, verbs=0, properNouns=0, prepositions=0, pronouns=0, qualifiers=0, unrecognised=0, nonWords=0) text_composition.update(adjectives=counts.get('JJ', 0) + counts.get('JJR', 0) + counts.get('JJS', 0)) text_composition.update(adverbs=counts.get('RB', 0) + counts.get('RBR', 0) + counts.get('RBS', 0)) text_composition.update(conjunctions=counts.get('CC', 0)) text_composition.update(determiners=counts.get('DT', 0) + counts.get('PDT', 0) + counts.get('WDT', 0)) text_composition.update(interjections=counts.get('UH', 0)) text_composition.update(nouns=counts.get('NN', 0) + counts.get('NNS', 0)) text_composition.update( verbs=counts.get('VB', 0) + counts.get('VBD', 0) + counts.get('VBG', 0) + counts.get('VBN', 0) + counts.get( 'VBP', 0) + counts.get('VBZ', 0)) text_composition.update(properNouns=counts.get('NNP', 0) + counts.get('NNPS', 0)) text_composition.update(prepositions=counts.get('IN', 0)) text_composition.update( pronouns=counts.get('PRP', 0) + counts.get('PRP$', 0) + counts.get('WP', 0) + counts.get('WP$', 0)) text_composition.update(qualifiers=counts.get('RB', 0)) text_composition.update(unrecognised=counts.get(None, 0)) text_composition.update(nonWords=counts.get('.', 0) + counts.get(',', 0) + counts.get(':', 0)) # Readability Issues text_readability_issues = dict(sentences30SyllablesCount=0, sentences20SyllablesCount=0, sentences30Syllables=[], sentences20Syllables=[], words4SyllablesCount=0, words12LettersCount=0, words4Syllables=[], words12Letters=[]) sentences_30_syllables, sentences_30_count, sentences_20_syllables, sentences_20_count = count_sentences_syllables( divided_text) sentences_30_syllables = find_limit_offcet(data_text, sentences_30_syllables, "sentences_30_syllables", "sentences_30_syllables", "This sentence has more than 30 syllables. Consider rewriting it to be shorter or splitting it into smaller sentences.", "Readability Issues") sentences_20_syllables = find_limit_offcet(data_text, sentences_20_syllables, "sentences_20_syllables", "sentences_20_syllables", "This sentence has more than 20 syllables. Consider rewriting it to be shorter or splitting it into smaller sentences.", "Readability Issues") text_readability_issues.update(sentences30SyllablesCount=sentences_30_count, sentences20SyllablesCount=sentences_20_count) words_12_letters, words_12_count, words_4_syllables, words_4_count = words_sentence_syllables(divided_text) words_12_letters = find_limit_offcet(data_text, words_12_letters, "words_12_letters", "words_12_letters", "This word is more than 12 letters", "Readability Issues") words_4_syllables = find_limit_offcet(data_text, words_4_syllables, "words_4_syllables", "words_4_syllables", "This word is more than 4 syllables", "Readability Issues") text_readability_issues.update(words4SyllablesCount=words_4_count, words12LettersCount=words_12_count) # Writing Style Issues text_style_issues = dict(passiveVoiceCount=0, passiveVoices=[], adverbsCount=0, adverbs=[], clicheCount=0, cliches=[]) passive_voises_return = find_passives(divided_text) passive_voises_return = find_limit_offcet(data_text, passive_voises_return, "passive_voises", "passive_voises", "Too much of using passive voises", "Writing Style Issues") adverbs_return = find_adverbs(pos_tags_tagger) adverbs_return = find_limit_offcet(data_text, adverbs_return, "adverbs", # writing_style_issues "adverbs", "Too much of using adverbs", "Writing Style Issues") text_style_issues.update(passiveVoiceCount=len(passive_voises_return), adverbsCount=len(adverbs_return)) # Text Density Issues text_density_issues = dict(charactersPerWord=0, syllablesPerWord=0, wordsPerSentence=0, wordsPerParagraph=0, sentencesPerParagraph=0) text_density_issues.update(charactersPerWord=textstat.avg_character_per_word(data_text), syllablesPerWord=textstat.avg_syllables_per_word(data_text), wordsPerSentence=round(textstat.lexicon_count(data_text) / len(divided_text), 2), wordsPerParagraph=round(textstat.lexicon_count(data_text) / paragraphCount, 2), sentencesPerParagraph=round(len(divided_text) / paragraphCount, 2)) # Language Issues text_language_issues = dict(spellingIssuesCount=0, grammarIssueCount=0) matches_limit_offcet = sentences_20_syllables + sentences_30_syllables + words_4_syllables + words_12_letters + \ passive_voises_return + adverbs_return return dict(readabilityGradeLevels=readability_grade_levels, readabilityScores=readability_scores, textStatistics=text_statistics, timings=timings_statistics, textComposition=text_composition, textReadabilityIssues=text_readability_issues, textStyleIssues=text_style_issues, textDensityIssues=text_density_issues, textLanguageIssues=text_language_issues, matches=matches_limit_offcet)
"Dale-Chall Readability Score","Readability Consensus"]""" df = pd.DataFrame(columns=col_names) df["Sentences"] = sentences df["Word count"] = df["Sentences"].apply(lambda x: word_count(x)) df["Sentence Length"] = df["Sentences"].apply(lambda x: sentence_count(x)) df["Average Sentence length"] = df["Sentences"].apply( lambda x: avg_sentence_length(x)) df["Syllable Count"] = df["Sentences"].apply(lambda x: syllables_count(x)) df["Average syllables per words"] = df["Sentences"].apply( lambda x: avg_syllables_per_word(x)) df["Polysyllablic count"] = df["Sentences"].apply( lambda x: poly_syllable_count(x)) df["Lexicon Count"] = df["Sentences"].apply(lambda x: lexical_counts(x)) df["Flesch Reading Ease score"] = df["Sentences"].apply( lambda x: flesch_reading_ease(x)) df["Flesch-Kincaid Grade Level"] = df["Sentences"].apply( lambda x: textstat.flesch_kincaid_grade(x)) df["Fog Scale"] = df["Sentences"].apply(lambda x: gunning_fog(x)) df["SMOG Index"] = df["Sentences"].apply(lambda x: smog_index(x)) df["Automated Readability Index"] = df["Sentences"].apply( lambda x: textstat.automated_readability_index(x)) df["Coleman-Liau Index"] = df["Sentences"].apply( lambda x: textstat.coleman_liau_index(x)) df["Linsear Write Formula"] = df["Sentences"].apply( lambda x: textstat.linsear_write_formula(x)) df["Dale-Chall Readability Score"] = df["Sentences"].apply( lambda x: dale_chall_readability_score(x)) df["Readability Consensus"] = df["Sentences"].apply( lambda x: textstat.text_standard(x, float_output=False)) df.to_hdf('textstat_data.h5', key='textstat', mode='w')
def download(request): global tweetsList response = HttpResponse(content_type='application/x-download') response['Content-Disposition'] = 'attachment; filename="tweets.csv"' #set headers of csv fieldnames = ['datetime', 'last updated', 'original username', 'original screen name', 'original user location', 'original user verified', 'retweet', 'retweeter username', 'retweeter screen name', 'retweeter location', 'retweeter verified', 'text', 'comment', # 'hashtags', 'urls', '#retweets','#favorites', '#retweets of retweet', 'hashtags', 'urls', '#retweets', '#favorites', '#favorites of retweet', 'original syllable count', 'original lexicon count', 'original sentence count', 'original flesch reading ease score', 'original flesch-kincaid grade level', 'original fog scale', 'original smog index', 'original automated readability index', 'original coleman-liau index', 'original linsear write level', 'original dale-chall readability score', 'original difficult words', 'original readability consensus', 'original neg sentiment', 'original neu sentiment', 'original pos sentiment', 'original overall sentiment', 'comment syllable count', 'comment lexicon count', 'comment sentence count', 'comment flesch reading ease score', 'comment flesch-kincaid grade level', 'comment fog scale', 'comment smog index', 'comment automated readability index', 'comment coleman-liau index', 'comment linsear write level', 'comment dale-chall readability score', 'comment difficult words', 'comment readability consensus', 'comment neg sentiment', 'comment neu sentiment', 'comment pos sentiment', 'comment overall sentiment', 'combined syllable count', 'combined lexicon count', 'combined sentence count', 'combined flesch reading ease score', 'combined flesch-kincaid grade level', 'combined fog scale', 'combined smog index', 'combined automated readability index', 'combined coleman-liau index', 'combined linsear write level', 'combined dale-chall readability score', 'combined difficult words', 'combined readability consensus', 'combined neg sentiment', 'combined neu sentiment', 'combined pos sentiment', 'combined overall sentiment', 'twitter users query', 'twitter excluded users query', 'twitter hashtags query', 'twitter keywords query', 'twitter from date query', 'twitter to date query'] writer = csv.writer(response, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(fieldnames) for tweet in tweetsList: #combine hashtags of tweet into string separated by commas hashtagString = "" tweetHashtags = HashtagLog.objects.filter(tweet__id=tweet.id) for i in range(len(tweetHashtags)): if i == 0: hashtagString += tweetHashtags[i].hashtag.hashtagText else: hashtagString += ", " + tweetHashtags[i].hashtag.hashtagText #combine urls of tweet into string separated by commas urlString = "" tweetUrls = UrlLog.objects.filter(tweet__id=tweet.id) for i in range(len(tweetUrls)): if i == 0: urlString += tweetUrls[i].url.urlText else: urlString += ", " + tweetUrls[i].url.urlText #display yes or no in verified column for original user if tweet.originalUser.isVerified: originalVerifiedString = "yes" else: originalVerifiedString = "no" #if not a retweet, new user fields should be empty newUsername = None newScreenName = None newLocation = None newVerifiedString = None #if retweet: #display yes or no in verified column for new user if tweet.newUser: if tweet.newUser.isVerified: newVerifiedString = "yes" else: newVerifiedString = "no" #set retweet fields newUsername = tweet.newUser.username newScreenName = tweet.newUser.screenName newLocation = tweet.newUser.location #display yes or no in retweet column if tweet.isRetweet: isRetweetString = "yes" else: isRetweetString = "no" #get sentiment scores of original text sid_obj = SentimentIntensityAnalyzer() sentiment_dict_original = sid_obj.polarity_scores(tweet.originalText) #combine comment text and original tezt and get sentiment scores for the combination commentText = "" if tweet.commentText: commentText = tweet.commentText sentiment_dict_combined = sid_obj.polarity_scores(tweet.originalText + commentText) #intialize all comment word processing to empty strings in case there is no comment text cSyllableCount = "" cLexiconCount = "" cSentenceCount = "" cFleschReadingEase = "" cFleschKincaidGrade = "" cGunningFog = "" cSmogIndex = "" cAutomatedReadabilityIndex = "" cColemanLiauIndex = "" cLinsearWriteFormula = "" cDaleChallReadabilityScore = "" cDifficultWords = "" cTextStandard = "" #if there is comment text, get language processing stats for comment text if tweet.commentText != None: cSyllableCount = textstat.syllable_count(tweet.commentText, lang='en_US') cLexiconCount = textstat.lexicon_count(tweet.commentText, removepunct=True) cSentenceCount = textstat.sentence_count(tweet.commentText) cFleschReadingEase = textstat.flesch_reading_ease(tweet.commentText) cFleschKincaidGrade = textstat.flesch_kincaid_grade(tweet.commentText) cGunningFog = textstat.gunning_fog(tweet.commentText) cSmogIndex = textstat.smog_index(tweet.commentText) cAutomatedReadabilityIndex = textstat.automated_readability_index(tweet.commentText) cColemanLiauIndex = textstat.coleman_liau_index(tweet.commentText) cLinsearWriteFormula = textstat.linsear_write_formula(tweet.commentText) cDaleChallReadabilityScore = textstat.dale_chall_readability_score(tweet.commentText) cDifficultWords = textstat.difficult_words(tweet.commentText) cTextStandard = textstat.text_standard(tweet.commentText, float_output=False) #get sentiment scores for comment text cNegSent = "" cNeuSent = "" cPosSent = "" cCompoundSent = "" if tweet.commentText: sentiment_dict_comment = sid_obj.polarity_scores(tweet.commentText) cNegSent = sentiment_dict_comment['neg'] cNeuSent = sentiment_dict_comment['neu'] cPosSent = sentiment_dict_comment['pos'] cCompoundSent = sentiment_dict_comment['compound'] #write all information about the tweet, and its language processing stats to row in csv writer.writerow( [tweet.createdAt, tweet.lastUpdated, tweet.originalUser.username, tweet.originalUser.screenName, tweet.originalUser.location, originalVerifiedString, isRetweetString, newUsername, newScreenName, newLocation, newVerifiedString, tweet.originalText, tweet.commentText, hashtagString, urlString, tweet.numRetweetsOriginal, # tweet.numFavoritesOriginal, tweet.numRetweetsNew, tweet.numFavoritesNew, tweet.numFavoritesOriginal, tweet.numFavoritesNew, textstat.syllable_count(tweet.originalText, lang='en_US'), textstat.lexicon_count(tweet.originalText, removepunct=True), textstat.sentence_count(tweet.originalText), textstat.flesch_reading_ease(tweet.originalText), textstat.flesch_kincaid_grade(tweet.originalText), textstat.gunning_fog(tweet.originalText), textstat.smog_index(tweet.originalText), textstat.automated_readability_index(tweet.originalText), textstat.coleman_liau_index(tweet.originalText), textstat.linsear_write_formula(tweet.originalText), textstat.dale_chall_readability_score(tweet.originalText), textstat.difficult_words(tweet.originalText), textstat.text_standard(tweet.originalText, float_output=False), sentiment_dict_original['neg'], sentiment_dict_original['neu'], sentiment_dict_original['pos'], sentiment_dict_original['compound'], cSyllableCount, cLexiconCount, cSentenceCount, cFleschReadingEase, cFleschKincaidGrade, cGunningFog, cSmogIndex, cAutomatedReadabilityIndex, cColemanLiauIndex, cLinsearWriteFormula, cDaleChallReadabilityScore, cDifficultWords, cTextStandard, cNegSent, cNeuSent, cPosSent, cCompoundSent, textstat.syllable_count(tweet.originalText + commentText, lang='en_US'), textstat.lexicon_count(tweet.originalText + commentText, removepunct=True), textstat.sentence_count(tweet.originalText + commentText), textstat.flesch_reading_ease(tweet.originalText + commentText), textstat.flesch_kincaid_grade(tweet.originalText + commentText), textstat.gunning_fog(tweet.originalText + commentText), textstat.smog_index(tweet.originalText + commentText), textstat.automated_readability_index(tweet.originalText + commentText), textstat.coleman_liau_index(tweet.originalText + commentText), textstat.linsear_write_formula(tweet.originalText + commentText), textstat.dale_chall_readability_score(tweet.originalText + commentText), textstat.difficult_words(tweet.originalText + commentText), textstat.text_standard(tweet.originalText + commentText, float_output=False), sentiment_dict_combined['neg'], sentiment_dict_combined['neu'], sentiment_dict_combined['pos'], sentiment_dict_combined['compound'], tweet.twitterQueryUsers, tweet.twitterQueryNotUsers, tweet.twitterQueryHashtags, tweet.twitterQueryKeywords, tweet.twitterQueryFromDate, tweet.twitterQueryToDate] ) return response