Ejemplo n.º 1
0
 def c_score(self, text):
     nlp = spacy.load('en')
     doc = nlp(text)
     sentences = [sent for sent in doc.sents]
     words = 0
     for sentence in sentences:
         words += len([token for token in sentence])
     num_sent = len(sentences)
     sent_len = float(words / num_sent)
     sylls = textstatistics().syllable_count(text)
     ASPW = float(sylls) / float(words)
     syls_p_wd = legacy_round(ASPW, 1)
     FRE = 206.835 - float(1.015 * sent_len) - float(84.6 * syls_p_wd)
     score = legacy_round(FRE, 2)
     return words, score
Ejemplo n.º 2
0
def coleman_liau_index(text):
    characters = len(re.sub("[^a-zA-Z]", "", text))
    sentences = sentence_count(text)
    words = word_count(text)
    CLI = float(5.89 *
                (characters / words)) - float(0.3 * (sentences / words)) - 15.8
    return legacy_round(CLI, 2)
Ejemplo n.º 3
0
def dale_chall_readability_score(text):
    #Raw score = 0.1579*(Percentage of difficult words) + 0.0496*(Average sentence length) + 3.6365

    words, sentences, difficult_words, _ = get_param(text)
    #calculate average sentence length
    average_sentence_length = float(words / sentences)
    # Number of words not termed as difficult words
    not_difficult_words = words - difficult_words
    if words > 0:

        # Percentage of words not on difficult word list
        per_not_difficult_words = float(not_difficult_words) / float(
            words) * 100

# diff_words stores percentage of difficult words
    per_diff_words = 100 - per_not_difficult_words
    raw_score = (0.1579 * per_diff_words) + (0.0496 * average_sentence_length)

    # If Percentage of Difficult Words is greater than 5 %, then;
    # Adjusted Score = Raw Score + 3.6365,
    # otherwise Adjusted Score = Raw Score

    if per_diff_words > 5:
        raw_score += 3.6365

    return legacy_round(raw_score, 2)
Ejemplo n.º 4
0
def dale_chall_readability_score(text):
    """
        Implements Dale Challe Formula:
        Raw score = 0.1579*(PDW) + 0.0496*(ASL) + 3.6365
        Here,
            PDW = Percentage of difficult words.
            ASL = Average sentence length
    """
    words = word_count(text)
    # Difficult words
    difficultWords = difficult_words(text)
    # Number of words not termed as difficult words
    count = words - difficultWords
    if words > 0:
        # Percentage of words not on difficult word list

        per = float(count) / float(words) * 100

    # diff_words stores percentage of difficult words
    diff_words = 100 - per

    raw_score = (0.1579 * diff_words) + \
                (0.0496 * avg_sentence_length(text))

    # If Percentage of Difficult Words is greater than 5 %, then;
    # Adjusted Score = Raw Score + 3.6365,
    # otherwise Adjusted Score = Raw Score

    if diff_words > 5:
        raw_score += 3.6365

    return legacy_round(raw_score, 2)
Ejemplo n.º 5
0
def avg_syllables_per_word(text):
    """
    Returns the average number of syllables per word in text
    """
    syllable = syllables_count(text)
    words = word_count(text)
    ASPW = float(syllable) / float(words)
    return legacy_round(ASPW, 1)
Ejemplo n.º 6
0
def automated_readability_index(text):
    characters = len(re.sub("[^a-zA-Z]", "", text))
    sentences = sentence_count(text)
    words = word_count(text)
    ARI = float(4.71 *
                (characters / words)) + float(0.5 *
                                              (words / sentences)) - 21.43
    return legacy_round(ARI, 2)
Ejemplo n.º 7
0
def flesch_reading_ease(file_input):
    text = None
    with open(file_input, "r") as f:
        text = f.read()
        text = unicodedata.normalize('NFC', text)

    FRE = 206.835 - float(1.015 * avg_sentence_length(text)) - float(
        84.6 * avg_syllables_per_word(text)) + 42
    return legacy_round(FRE, 2)
Ejemplo n.º 8
0
def smog_index(text):
    if sentence_count(text) >= 3:
        #print(text)
        poly_syllab = poly_syllable_count(text)
        SMOG = (1.043 * (30 *
                         (poly_syllab / sentence_count(text)))**0.5) + 3.1291
        return legacy_round(SMOG, 1)
    else:
        return 0
Ejemplo n.º 9
0
def smog_index(text):
  #SMOG grading = 3 + √(polysyllable count)
  #polysyllable count = number of words of more than two syllables in a sample of 30 sentences

  _,sentence_count,_,poly_syllable_count = get_param(text) 

  if sentence_count >= 3:
    SMOG = (1.043 * (30*(poly_syllable_count / sentence_count))**0.5) \
            + 3.1291
    return legacy_round(SMOG, 2)
  else:
    return 0
Ejemplo n.º 10
0
 def flesch_reading_ease(text):
     """
         Implements Flesch Formula:
         Here,
           ASL = average sentence length (number of words
                 divided by number of sentences)
           ASW = average word length in syllables (number of syllables
                 divided by number of words)
     """
     FRE = 206.835 - float(1.015 * avg_sentence_length(text)) -\
           float(84.6 * avg_syllables_per_word(text))
     return legacy_round(FRE, 2)
Ejemplo n.º 11
0
def flesch_reading_ease(text): 
  #Reading Ease score = 206.835 - (1.015 × average sentence length) - (84.6 × average word length in syllables)

  words_count,sentences_count,_,_ = get_param(text) 
  #calculate average sentence length
  avg_sentence_length = float(words_count/sentences_count)
  syllable_count = textstatistics().syllable_count(text)
  #calculate average syllables per word
  avg_syllables_per_word = float(syllable_count) / float(words_count)

  FRE = 206.835 - float(1.015 * avg_sentence_length) - float(84.6 * avg_syllables_per_word) 

  return legacy_round(FRE, 2) 
Ejemplo n.º 12
0
def flesch_grade_level(text):
    """
        Implements Flesch Formula:
        Reading Ease score = 206.835 - (1.015 × ASL) - (84.6 × ASW)
        Here,
          ASL = average sentence length (number of words
                divided by number of sentences)
          ASW = average word length in syllables (number of syllables
                divided by number of words)
    """
    FGL = float(0.39 * avg_sentence_length(text)) + float(
        11.8 * avg_syllables_per_word(text)) - 15.59
    return legacy_round(FGL, 2)
Ejemplo n.º 13
0
def smog_index(text):
    """
        Implements SMOG Formula / Grading
        SMOG grading = 3 + ?polysyllable count.
        Here, polysyllable count = number of words of more
        than two syllables in a sample of 30 sentences.
    """

    if sentence_count(text) >= 3:
        poly_syllab = poly_syllable_count(text)
        SMOG = (1.043 * (30*(poly_syllab / sentence_count(text)))**0.5) + 3.1291
        return legacy_round(SMOG, 1)
    else:
        return 0
Ejemplo n.º 14
0
def avg_syllables_per_word(text):
    word = text.lower()
    words = word_count(text)
    count = 0
    vowels = "aeiou"
    if word[0] in vowels:
        count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            count += 1
    if count == 0:
        count += 1
    syllables_data = count / words
    # syllable = syllables_count(text)
    # words = word_count(text)
    # ASPW = float(syllable) / float(words)
    return legacy_round(syllables_data, 2)
Ejemplo n.º 15
0
def avg_syllables_per_word(text):
    syllable = syllables_count(text)
    words = word_count(text)
    ASPW = float(syllable) / float(words)
    return legacy_round(ASPW, 1)
Ejemplo n.º 16
0
    docReader = nltk.corpus.PlaintextCorpusReader('./', artist + '.txt')
    sentences = len(docReader.sents())

    # Calculate the total number of difficult words
    diff_words_count = textstat.difficult_words(raw_text)

    # Calculate readability-- Gunning Fog
    dif_words = (diff_words_count / ttl_words * 100)
    gf_read = 0.4 * (float(ttl_words / sentences) + dif_words)

    # Calculate readability-- SMOG
    poly_syl = 0
    for word in words:
        syl_count = textstatistics().syllable_count(word)
        if syl_count >= 3:
            poly_syl += 1
    SMOG = (1.043 * (30 * (poly_syl / sentences))**0.5) + 3.1291
    smog_read = legacy_round(SMOG, 1)

    # Calculate readability-- Linsear Write
    cl_read = textstat.coleman_liau_index(raw_text)

    df.loc[i] = (artist, 0, ttl_words, sentences, 0, len(set(words)),
                 round(100 - (len(lyrics_no_sw) * 100.0 / ttl_words),
                       2), diff_words_count, gf_read, smog_read, cl_read)
    i += 1

df['songs'] = [304, 224]
df['words_per_song'] = df['words'] / df['songs']
print(df)
df.to_csv("summary.csv", index=False)
Ejemplo n.º 17
0
def avg_syllables_per_word(text):
    nsyllables = syllables_count(text)
    nwords = word_count(text)
    ASPW = float(nsyllables) / float(nwords)
    return legacy_round(ASPW, 2)
Ejemplo n.º 18
0
def avg_sentence_length(text):
    nwords = word_count(text)
    nsentences = sentence_count(text)
    average_sentence_length = float(nwords / nsentences)
    return legacy_round(average_sentence_length, 2)
Ejemplo n.º 19
0
def flesch_kincaid(text, avg_sen_len, avg_syl):
    flesch = 206.835 - float(1.015 * avg_sen_len) -\
          float(84.6 * avg_syl)
    return legacy_round(flesch, 2)