Beispiel #1
0
def run_FKGL(output_dir):
    with open(output_dir) as f:
        output = f.readlines()
        output = [d.lower().strip() for d in output]

    output_final = " ".join(output)
    rd = Readability(output_final)
    score = rd.FleschKincaidGradeLevel()
    return score
Beispiel #2
0
def show_stat(text):
    rd = Readability(text)
    print 'Test text:'
    print '"%s"\n' % text
    print 'ARI: ', rd.ARI()
    print 'FleschReadingEase: ', rd.FleschReadingEase()
    print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel()
    print 'GunningFogIndex: ', rd.GunningFogIndex()
    print 'SMOGIndex: ', rd.SMOGIndex()
    print 'ColemanLiauIndex: ', rd.ColemanLiauIndex()
    print 'LIX: ', rd.LIX()
    print 'RIX: ', rd.RIX()
Beispiel #3
0
def get_read_stats(text):
    read = {}
    # readability stats
    rd = Readability(text)
    read['ari'] = rd.ARI()
    read['flesch_reading_ease'] = rd.FleschReadingEase()
    read['flesch_kincaid_grade_level'] = rd.FleschKincaidGradeLevel()
    read['gunning_fog_index'] = rd.GunningFogIndex()
    read['smog_index'] = rd.SMOGIndex()
    read['coleman_liau_index'] = rd.ColemanLiauIndex()
    read['lix'] = rd.LIX()
    read['rix'] = rd.RIX()
    return read
Beispiel #4
0
def readability(id):
    r = {}
    text = getDocContent(id)
    #print text
    rd = Readability(text)

    r["ARI"] = rd.ARI()
    r["FleschReadingEase"] = rd.FleschReadingEase()
    r["FleschKincaidGradeLevel"] = rd.FleschKincaidGradeLevel()
    r["RIX"] = rd.RIX()
    r["GunningFogIndex"] = rd.GunningFogIndex()
    r["SMOGIndex"] = rd.SMOGIndex()
    r["ColemanLiauIndex"] = rd.ColemanLiauIndex()
    r["LIX"] = rd.LIX()

    return r
def getReadability():
    authorFileNames = os.listdir(directory)
    texts = []
    authors = []
    truth = {}
    quote = []
    sents = []

    for file in authorFileNames:
        if file.endswith(".xml"):
            te = gettext(file)
            te.encode('ascii', 'ignore')
            texts.append(te)
            authors.append(file[:-4])
        else:
            fgh = open(directory + "/" + file, 'r')
            fg = fgh.read().split('\n')[:-1]
            for r in fg:
                df = r.split(':::')[1:]
                truth[r.split(':::')[0]] = df
            fgh.close()

    f = open('PANreadibility.csv', 'w')
    f.write(
        'ID,Gender,Age,ARI,FleschReadingEase,FleschKincaidGradeLevel,GunningFogIndex,SMOGIndex,ColemanLiauIndex,LIX,RIX\n'
    )
    for i in range(len(authors)):
        sf = texts[i]
        rd = Readability(sf.encode('ascii', 'ignore'))
        f.write(authors[i] + ',' + truth[authors[i]][0] + ',' +
                truth[authors[i]][1] + ',' + str(rd.ARI()) + ',' +
                str(rd.FleschReadingEase()) + ',' +
                str(rd.FleschKincaidGradeLevel()) + ',' +
                str(rd.GunningFogIndex()) + ',' + str(rd.SMOGIndex()) + ',' +
                str(rd.ColemanLiauIndex()) + ',' + str(rd.LIX()) + ',' +
                str(rd.RIX()) + '\n')

    f.close()
Beispiel #6
0
 def readability(self, text):
     rd = Readability(text)
     fkg_score = rd.FleschKincaidGradeLevel()
     SMOG = rd.SMOGIndex()
     return fkg_score, SMOG
            # Create a frequency distribution for the text
            text = nltk.Text(tokens)
            fdist = nltk.FreqDist(text)

            # Calculate the type-token ratio
            vocab_richness = len(set(tokens)) / len(tokens)
            out_file.write(str(vocab_richness) + '\n')

            # Calculate average word length:
            avg_word_len = fdist.N() / len(fdist)
            out_file.write(str(avg_word_len) + '\n')

            # Compute Readability
            rd = Readability(raw)
            out_file.write(str(rd.FleschKincaidGradeLevel()) + '\n')

            # Calculate the distribution of parts-of-speech
            tagged_text = nltk.pos_tag(text)
            tag_fd = nltk.FreqDist(tag for (word, tag) in tagged_text)

            for tag in pos_tags:
                out_file.write(str(tag_fd[tag]) + '\n')

            # Calculate the frequency of the 50 most frequenct function words
            stopwords = nltk.corpus.stopwords.words('english')
            txt_stopwords = [w for w in tokens if w in stopwords]
            functionWrd_freq = nltk.FreqDist(txt_stopwords)

            for func_word in func_words:
                out_file.write(str(functionWrd_freq[func_word]) + '\n')
# encoding: utf-8
# -*- coding: utf-8 -*
import sys
reload(sys)
sys.setdefaultencoding('utf8')

from readability import Readability

#file = open("C:\\Users\\Administrator\\Desktop\\myfolder\\sea-and-adventures\\the-old-man-and-the-sea.txt")
file = open(
    "C:\\Users\\Administrator\\Desktop\\myfolder\\corpora\\An-Inquiry-into-the-Nature-and-Causes-of-the-Wealth-of-Nations.txt"
)
text = file.read()
rd = Readability(text)
print 'ARI: ', rd.ARI()
print 'FleschReadingEase: ', rd.FleschReadingEase()
print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel()
print 'GunningFogIndex: ', rd.GunningFogIndex()
print 'SMOGIndex: ', rd.SMOGIndex()
print 'ColemanLiauIndex: ', rd.ColemanLiauIndex()
print 'LIX: ', rd.LIX()
print 'RIX: ', rd.RIX()
Beispiel #9
0
#!/usr/bin/python

from readability import Readability
import sys

if __name__ == '__main__':
    infile = sys.argv[1]
    text = open(infile).read()
    rd = Readability(text)
    print(rd.FleschKincaidGradeLevel())

Beispiel #10
0
def get_text_flesch_grade_score(inp_text):
    rd = Readability(inp_text.strip())
    return rd.FleschKincaidGradeLevel()