def _get_reading_stats(no_code_text):
    """
    Returns reading level information
    :param no_code_text: String to analyse
    :return: list of details
    """
    group_by = 'Reading Level Analysis '
    results = []
    results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by))        # higher is better, scale 0 to 100
    results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by))
    try:
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by))
    try:
        results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
    results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by))
    results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by))
    try:
        results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by))
    except IndexError:
        results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by))
    try:
        results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by))

    try:
        results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by))
    except (TypeError, IndexError):
        results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by))
    return results
def textstat_analysis(profile_text):
    fre = textstat.flesch_reading_ease(profile_text)
    smog = textstat.smog_index(profile_text)
    fkg = textstat.flesch_kincaid_grade(profile_text)
    coleman = textstat.coleman_liau_index(profile_text)
    ari = textstat.automated_readability_index(profile_text)
    dale = textstat.dale_chall_readability_score(profile_text)
    dw = textstat.difficult_words(profile_text)
    lwf = textstat.linsear_write_formula(profile_text)
    gf = textstat.gunning_fog(profile_text)
    rc = textstat.readability_consensus(profile_text)
    word_count = textstat.lexicon_count(profile_text)
    return (fre, smog, fkg, coleman, ari, dale, dw, lwf, gf, rc, word_count)
Example #3
0
def calculate_readability_measures(id):
    """ Count the words in doc and update the document. """
    es = elasticsearch.Elasticsearch()
    source = es.get_source(index='beek', doc_type='page', id=id)
    # count = len(source['content'].split())
    try:
        measures = {
            'flesch':
            textstat.flesch_reading_ease(source['content']),
            'smog':
            textstat.smog_index(source['content']),
            'flesch_kincaid':
            textstat.flesch_kincaid_grade(source['content']),
            'coleman_liau':
            textstat.coleman_liau_index(source['content']),
            'readability':
            textstat.automated_readability_index(source['content']),
            'dale_chall':
            textstat.dale_chall_readability_score(source['content']),
            'difficult_words':
            textstat.difficult_words(source['content']),
            'linsear_write_formula':
            textstat.linsear_write_formula(source['content']),
            'gunning_fog':
            textstat.gunning_fog(source['content']),
            'consensus':
            textstat.readability_consensus(source['content']),
        }

        es.update(index='beek',
                  doc_type='page',
                  id=id,
                  body={'doc': {
                      'measures': measures
                  }},
                  refresh=True)
    except Exception as err:
        pass
Example #4
0
def calculate_readability_measures(id):
    """ Count the words in doc and update the document. """
    es = elasticsearch.Elasticsearch()
    source = es.get_source(index='beek', doc_type='page', id=id)
    # count = len(source['content'].split())
    try:
        measures = {
            'flesch': textstat.flesch_reading_ease(source['content']),
            'smog': textstat.smog_index(source['content']),
            'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']),
            'coleman_liau': textstat.coleman_liau_index(source['content']),
            'readability': textstat.automated_readability_index(source['content']),
            'dale_chall': textstat.dale_chall_readability_score(source['content']),
            'difficult_words': textstat.difficult_words(source['content']),
            'linsear_write_formula': textstat.linsear_write_formula(source['content']),
            'gunning_fog': textstat.gunning_fog(source['content']),
            'consensus': textstat.readability_consensus(source['content']),
        }

        es.update(index='beek', doc_type='page', id=id,
                  body={'doc': {'measures': measures}}, refresh=True)
    except Exception as err:
        pass
    def get_readability(self, corpus, type='ari'):
        readability = None
        if type == 'ari':
            readability = textstat.automated_readability_index(corpus)
        elif type == 'flesch':
            readability = textstat.flesch_reading_ease(corpus)
        elif type == 'smog':
            readability = textstat.smog_index(corpus)
        elif type == 'flesch_kinciad':
            readability = textstat.flesch_kincaid_grade(corpus)
        elif type == 'coleman':
            readability = textstat.coleman_liau_index(corpus)
        elif type == 'dale_chall':
            readability = textstat.dale_chall_readability_score(corpus)
        elif type == 'difficult_words':
            readability = textstat.difficult_words(corpus)
        elif type == 'linsear':
            readability = textstat.linsear_write_formula(corpus)
        elif type == 'gunning_fog':
            readability = textstat.gunning_fog(corpus)
        elif type == 'readability_conensus':
            readability = textstat.readability_consensus(corpus)

        return readability
#!/bin/python

import sys, string, os
from textstat.textstat import textstat

inputfile = ''
test_data = ""

script_name = sys.argv[0]
inputfile = sys.argv[1]

with open(inputfile) as myfile:
	test_data="".join(line.rstrip() for line in myfile)

var1 = str(textstat.flesch_reading_ease(test_data))
var2 = str(textstat.smog_index(test_data))
var3 = str(textstat.flesch_kincaid_grade(test_data))
var4 = str(textstat.coleman_liau_index(test_data))
var5 = str(textstat.automated_readability_index(test_data))
var6 = str(textstat.dale_chall_readability_score(test_data))
var7 = str(textstat.difficult_words(test_data))
var8 = str(textstat.linsear_write_formula(test_data))
var9 = str(textstat.gunning_fog(test_data))
var10 = str(textstat.readability_consensus(test_data))
var11 = str(textstat.syllable_count(test_data))
var12 = str(textstat.lexicon_count(test_data, 1))
var13 = str(textstat.sentence_count(test_data))

print(var1 + ',' + var2 + ',' + var3 + ',' + var4 + ',' + var5 + ',' + var6 + ',' + var7 + ',' + var8 + ',' + var9 + ',' + var10 + ',' + var11 + ',' + var12 + ',' + var13)
Example #7
0
def _get_reading_stats(no_code_text):
    """
    Returns reading level information
    :param no_code_text: String to analyse
    :return: list of details
    """
    group_by = 'Reading Level Analysis '
    results = []
    results.append(
        TextFeature('Flesch Reading Ease',
                    textstat.flesch_reading_ease(no_code_text),
                    group_by))  # higher is better, scale 0 to 100
    results.append(
        TextFeature('Flesch-Kincaid Grade Level',
                    textstat.flesch_kincaid_grade(no_code_text), group_by))
    try:
        results.append(
            TextFeature('The Fog Scale (Gunning FOG formula)',
                        textstat.gunning_fog(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(
            TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined",
                        group_by))
    try:
        results.append(
            TextFeature('The SMOG Index', textstat.smog_index(no_code_text),
                        group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
    results.append(
        TextFeature('Automated Readability Index',
                    textstat.automated_readability_index(no_code_text),
                    group_by))
    results.append(
        TextFeature('The Coleman-Liau Index',
                    textstat.coleman_liau_index(no_code_text), group_by))
    try:
        results.append(
            TextFeature('Linsear Write Formula',
                        textstat.linsear_write_formula(no_code_text),
                        group_by))
    except IndexError:
        results.append(
            TextFeature('Linsear Write Formula', "Undetermined", group_by))
    try:
        results.append(
            TextFeature('Dale Chall Readability Score',
                        textstat.dale_chall_readability_score(no_code_text),
                        group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(
            TextFeature('Dale Chall Readability Score', "Undetermined",
                        group_by))

    try:
        results.append(
            TextFeature('Readability Consensus',
                        textstat.readability_consensus(no_code_text),
                        group_by))
    except (TypeError, IndexError):
        results.append(
            TextFeature('Readability Consensus',
                        "Undetermined; One of the tests above failed.",
                        group_by))
    return results
Example #8
0
#ui=[]
print("grammer for the essay's")
for index in range(len(df)):
     p=df.essay[index]
     p1=nltk.word_tokenize(p.lower())
     p2=nltk.pos_tag(p1)
     counts=Counter(tag for p1,tag in p2)
     print(counts)
     total = sum(counts.values())
     print(dict((word, float(count)/total) for word,count in counts.items()))
     print("")
print("readability/complexity")     
for index in range(len(df)):
    r=df.essay[index]
    print(textstat.syllable_count(r))    
    print(textstat.readability_consensus(r))
    print("")
    #print(textstat.flesch_reading_ease(r))
    #print(textstat.flesch_kincaid_grade(r))
    
    
"""for index in range(len(df)):
    r=df.essay[index]     
    for words in r.split():
        words1 = [w1 for w1 in words if not w1 in stopwords.words("english")]
        print(words1)"""
        
#Example
print("normalizing values")
ranger = interp1d([1,512],[1,10])
print(ranger(256))