def c_score(self, text): nlp = spacy.load('en') doc = nlp(text) sentences = [sent for sent in doc.sents] words = 0 for sentence in sentences: words += len([token for token in sentence]) num_sent = len(sentences) sent_len = float(words / num_sent) sylls = textstatistics().syllable_count(text) ASPW = float(sylls) / float(words) syls_p_wd = legacy_round(ASPW, 1) FRE = 206.835 - float(1.015 * sent_len) - float(84.6 * syls_p_wd) score = legacy_round(FRE, 2) return words, score
def coleman_liau_index(text): characters = len(re.sub("[^a-zA-Z]", "", text)) sentences = sentence_count(text) words = word_count(text) CLI = float(5.89 * (characters / words)) - float(0.3 * (sentences / words)) - 15.8 return legacy_round(CLI, 2)
def dale_chall_readability_score(text): #Raw score = 0.1579*(Percentage of difficult words) + 0.0496*(Average sentence length) + 3.6365 words, sentences, difficult_words, _ = get_param(text) #calculate average sentence length average_sentence_length = float(words / sentences) # Number of words not termed as difficult words not_difficult_words = words - difficult_words if words > 0: # Percentage of words not on difficult word list per_not_difficult_words = float(not_difficult_words) / float( words) * 100 # diff_words stores percentage of difficult words per_diff_words = 100 - per_not_difficult_words raw_score = (0.1579 * per_diff_words) + (0.0496 * average_sentence_length) # If Percentage of Difficult Words is greater than 5 %, then; # Adjusted Score = Raw Score + 3.6365, # otherwise Adjusted Score = Raw Score if per_diff_words > 5: raw_score += 3.6365 return legacy_round(raw_score, 2)
def dale_chall_readability_score(text): """ Implements Dale Challe Formula: Raw score = 0.1579*(PDW) + 0.0496*(ASL) + 3.6365 Here, PDW = Percentage of difficult words. ASL = Average sentence length """ words = word_count(text) # Difficult words difficultWords = difficult_words(text) # Number of words not termed as difficult words count = words - difficultWords if words > 0: # Percentage of words not on difficult word list per = float(count) / float(words) * 100 # diff_words stores percentage of difficult words diff_words = 100 - per raw_score = (0.1579 * diff_words) + \ (0.0496 * avg_sentence_length(text)) # If Percentage of Difficult Words is greater than 5 %, then; # Adjusted Score = Raw Score + 3.6365, # otherwise Adjusted Score = Raw Score if diff_words > 5: raw_score += 3.6365 return legacy_round(raw_score, 2)
def avg_syllables_per_word(text): """ Returns the average number of syllables per word in text """ syllable = syllables_count(text) words = word_count(text) ASPW = float(syllable) / float(words) return legacy_round(ASPW, 1)
def automated_readability_index(text): characters = len(re.sub("[^a-zA-Z]", "", text)) sentences = sentence_count(text) words = word_count(text) ARI = float(4.71 * (characters / words)) + float(0.5 * (words / sentences)) - 21.43 return legacy_round(ARI, 2)
def flesch_reading_ease(file_input): text = None with open(file_input, "r") as f: text = f.read() text = unicodedata.normalize('NFC', text) FRE = 206.835 - float(1.015 * avg_sentence_length(text)) - float( 84.6 * avg_syllables_per_word(text)) + 42 return legacy_round(FRE, 2)
def smog_index(text): if sentence_count(text) >= 3: #print(text) poly_syllab = poly_syllable_count(text) SMOG = (1.043 * (30 * (poly_syllab / sentence_count(text)))**0.5) + 3.1291 return legacy_round(SMOG, 1) else: return 0
def smog_index(text): #SMOG grading = 3 + √(polysyllable count) #polysyllable count = number of words of more than two syllables in a sample of 30 sentences _,sentence_count,_,poly_syllable_count = get_param(text) if sentence_count >= 3: SMOG = (1.043 * (30*(poly_syllable_count / sentence_count))**0.5) \ + 3.1291 return legacy_round(SMOG, 2) else: return 0
def flesch_reading_ease(text): """ Implements Flesch Formula: Here, ASL = average sentence length (number of words divided by number of sentences) ASW = average word length in syllables (number of syllables divided by number of words) """ FRE = 206.835 - float(1.015 * avg_sentence_length(text)) -\ float(84.6 * avg_syllables_per_word(text)) return legacy_round(FRE, 2)
def flesch_reading_ease(text): #Reading Ease score = 206.835 - (1.015 × average sentence length) - (84.6 × average word length in syllables) words_count,sentences_count,_,_ = get_param(text) #calculate average sentence length avg_sentence_length = float(words_count/sentences_count) syllable_count = textstatistics().syllable_count(text) #calculate average syllables per word avg_syllables_per_word = float(syllable_count) / float(words_count) FRE = 206.835 - float(1.015 * avg_sentence_length) - float(84.6 * avg_syllables_per_word) return legacy_round(FRE, 2)
def flesch_grade_level(text): """ Implements Flesch Formula: Reading Ease score = 206.835 - (1.015 × ASL) - (84.6 × ASW) Here, ASL = average sentence length (number of words divided by number of sentences) ASW = average word length in syllables (number of syllables divided by number of words) """ FGL = float(0.39 * avg_sentence_length(text)) + float( 11.8 * avg_syllables_per_word(text)) - 15.59 return legacy_round(FGL, 2)
def smog_index(text): """ Implements SMOG Formula / Grading SMOG grading = 3 + ?polysyllable count. Here, polysyllable count = number of words of more than two syllables in a sample of 30 sentences. """ if sentence_count(text) >= 3: poly_syllab = poly_syllable_count(text) SMOG = (1.043 * (30*(poly_syllab / sentence_count(text)))**0.5) + 3.1291 return legacy_round(SMOG, 1) else: return 0
def avg_syllables_per_word(text): word = text.lower() words = word_count(text) count = 0 vowels = "aeiou" if word[0] in vowels: count += 1 for index in range(1, len(word)): if word[index] in vowels and word[index - 1] not in vowels: count += 1 if count == 0: count += 1 syllables_data = count / words # syllable = syllables_count(text) # words = word_count(text) # ASPW = float(syllable) / float(words) return legacy_round(syllables_data, 2)
def avg_syllables_per_word(text): syllable = syllables_count(text) words = word_count(text) ASPW = float(syllable) / float(words) return legacy_round(ASPW, 1)
docReader = nltk.corpus.PlaintextCorpusReader('./', artist + '.txt') sentences = len(docReader.sents()) # Calculate the total number of difficult words diff_words_count = textstat.difficult_words(raw_text) # Calculate readability-- Gunning Fog dif_words = (diff_words_count / ttl_words * 100) gf_read = 0.4 * (float(ttl_words / sentences) + dif_words) # Calculate readability-- SMOG poly_syl = 0 for word in words: syl_count = textstatistics().syllable_count(word) if syl_count >= 3: poly_syl += 1 SMOG = (1.043 * (30 * (poly_syl / sentences))**0.5) + 3.1291 smog_read = legacy_round(SMOG, 1) # Calculate readability-- Linsear Write cl_read = textstat.coleman_liau_index(raw_text) df.loc[i] = (artist, 0, ttl_words, sentences, 0, len(set(words)), round(100 - (len(lyrics_no_sw) * 100.0 / ttl_words), 2), diff_words_count, gf_read, smog_read, cl_read) i += 1 df['songs'] = [304, 224] df['words_per_song'] = df['words'] / df['songs'] print(df) df.to_csv("summary.csv", index=False)
def avg_syllables_per_word(text): nsyllables = syllables_count(text) nwords = word_count(text) ASPW = float(nsyllables) / float(nwords) return legacy_round(ASPW, 2)
def avg_sentence_length(text): nwords = word_count(text) nsentences = sentence_count(text) average_sentence_length = float(nwords / nsentences) return legacy_round(average_sentence_length, 2)
def flesch_kincaid(text, avg_sen_len, avg_syl): flesch = 206.835 - float(1.015 * avg_sen_len) -\ float(84.6 * avg_syl) return legacy_round(flesch, 2)