Python gunning_fog Examples, textstat.textstat.textstat.gunning_fog Python Examples

Example #1

0

Show file

File: app.py Project: smanjit/metrics

def compareContents():
	if request.method == "POST":
	    line = request.form['poem']
	    poem1 = request.form['poem1']
		#---------Metrics comparison logic goes here. keep them in session attributes-----------------------#

	    session['line'] = line	    
        #print("i am in row : ",row)
        #print "Tagline :", line
	    #print("no of words= ",len(line.split()))
	    #line1 = line.lstrip('0123456789.- ,')
	    #print "flesch_reading_ease = ",textstat.flesch_reading_ease(line)
	    fre = textstat.flesch_reading_ease(line)
	    session['fre'] = fre
	    #print "smog_index = ",textstat.smog_index(line)
	    smog = textstat.smog_index(line)
	    session['smog'] = smog
	    #print "flesch_kincaid_grade = ",textstat.flesch_kincaid_grade(line)
	    fkg = textstat.flesch_kincaid_grade(line)
	    session['fkg'] = fkg
	    #print "dale_chall_readability_score = ", textstat.dale_chall_readability_score(line)
	    dcr = textstat.dale_chall_readability_score(line)
	    session['dcr'] = dcr
	    #print "gunning_fog = ",textstat.gunning_fog(line)
	    gf = textstat.gunning_fog(line)
	    session['gf'] = gf
	    metrics = True
	    return render_template('compareContents.html',metrics=metrics, line=line, fre=fre, smog=smog, fkg=fkg, dcr=dcr,gf=gf)
	return render_template('compareContents.html')

Example #2

0

Show file

def f():
    print("hello")
    book = xlwt.Workbook()
    worksheet = book.add_sheet('ReadabilityScore')
    worksheet.write(0, 0, "Gen_sent")
    worksheet.write(0, 1, "flesch_reading_ease")
    worksheet.write(0, 2, "flesch_kincaid_grade")
    worksheet.write(0, 3, "dale_chall_readability_score")
    worksheet.write(0, 4, "gunning_fog")

    f = open('abc.txt')  #, encoding='utf-8')
    row = 1
    for line in iter(f):
        #print("i am in row : ",row)
        #print "Tagline :", line
        worksheet.write(row, 0, line)
        #print("no of words= ",len(line.split()))
        #line1 = line.lstrip('0123456789.- ,')
        #print "flesch_reading_ease = ",textstat.flesch_reading_ease(line)
        fre = textstat.flesch_reading_ease(line)
        worksheet.write(row, 1, fre)
        #print "smog_index = ",textstat.smog_index(line)
        smog = textstat.smog_index(line)
        #print "flesch_kincaid_grade = ",textstat.flesch_kincaid_grade(line)
        fkg = textstat.flesch_kincaid_grade(line)
        worksheet.write(row, 2, fkg)
        #print "dale_chall_readability_score = ", textstat.dale_chall_readability_score(line)
        dcr = textstat.dale_chall_readability_score(line)
        worksheet.write(row, 3, dcr)
        #print "gunning_fog = ",textstat.gunning_fog(line)
        gf = textstat.gunning_fog(line)
        worksheet.write(row, 4, gf)
        row += 1
    book.save('Readability_Scores.xls')

Example #3

0

Show file

File: hs_featureextraction.py Project: gisemba/bernie_vs_trump

def other_features(tweet):
    ##SENTIMENT
    sentiment = VS(tweet)
    ##READABILITY
    #See https://pypi.python.org/pypi/textstat/
    flesch = round(textstat.flesch_reading_ease(tweet), 3)
    flesch_kincaid = round(textstat.flesch_kincaid_grade(tweet), 3)
    gunning_fog = round(textstat.gunning_fog(tweet), 3)
    ##TEXT-BASED
    length = len(tweet)
    num_terms = len(tweet.split())
    ##TWITTER SPECIFIC TEXT FEATURES
    hashtag_count = tweet.count("#")
    mention_count = tweet.count("@")
    url_count = tweet.count("http")
    retweet = 0
    if tweet.lower().startswith("rt") is True:
        retweet = 1
    #Checking if RT is in the tweet
    words = tweet.lower().split()
    if "rt" in words or "#rt" in words:
        retweet = 1
    features = [
        sentiment['compound'], flesch, flesch_kincaid, gunning_fog, length,
        num_terms, hashtag_count, mention_count, url_count, retweet
    ]
    return features

Example #4

0

Show file

File: download_book.py Project: RealTimeWeb/datasets

def get_special_metrics(text):
    blob = TextBlob(text)
    main = {
        'statistics': {
            'syllables': textstat.syllable_count(text),
            'words': textstat.lexicon_count(text),
            'characters': textstat.char_count(text),
            'polysyllables': textstat.polysyllabcount(text),
            'average letter per word': textstat.avg_letter_per_word(text),
            'average sentence length': textstat.avg_sentence_length(text),
            'average sentence per word': textstat.avg_sentence_per_word(text),
            'sentences': textstat.sentence_count(text)
        },
        'difficulty': {
            'flesch reading ease': textstat.flesch_reading_ease(text),
            'smog index': textstat.smog_index(text),
            'flesch kincaid grade': textstat.flesch_kincaid_grade(text),
            'coleman liau index': textstat.coleman_liau_index(text),
            #'automated readability index': textstat.automated_readability_index(text),
            #'dale chall readability score': textstat.dale_chall_readability_score(text),
            #'difficult words': textstat.difficult_words(text),
            #'linsear write formula': textstat.linsear_write_formula(text),
            'gunning fog': textstat.gunning_fog(text)
        },
        'sentiments': {
            'polarity': blob.sentiment.polarity,
            'subjectivity': blob.sentiment.subjectivity
        }
    }

    return main

Example #5

0

Show file

File: ReadSpeed.py Project: d4444x/readr

def text_analytics(text):
    if textstat.sentence_count(text) != 0:
        lexicon = textstat.lexicon_count(text) #word count
        sent = textstat.sentence_count(text) #sentence count
        syll = textstat.syllable_count(text) #syllable count
        flesch = textstat.flesch_reading_ease(text) #flesch score
        smog = textstat.smog_index(text) #SMOG index
        fog = textstat.gunning_fog(text) #FOG index
        dale = textstat.dale_chall_readability_score(text) #grade level
        ari = textstat.automated_readability_index(text) #grade level
        cl = textstat.coleman_liau_index(text) #grade level

        flesch1 = lexicon*flesch
        flesch2 = sent*flesch
        flesch3 = syll*flesch
        smog1 = lexicon*smog
        smog2 = sent*smog
        smog3 = syll*smog
        fog1 = lexicon*fog
        fog2 = sent*fog
        fog3 = syll*fog
        dale1 = lexicon*dale
        dale2 = sent*dale
        dale3=syll*dale
        ari1 = lexicon*ari
        ari2 = sent*ari
        ari3 = syll*ari
        cl1 = lexicon*cl
        cl2 = sent*cl
        cl3 = syll*cl
        x=[lexicon,sent,syll,flesch,smog,fog,dale,ari,cl,flesch1,flesch2,flesch3,smog1,                 smog2,smog3,fog1,fog2,fog3,dale1,dale2,dale3,ari1,ari2,ari3,cl1,cl2,cl3]
    return(x)

Example #6

0

Show file

File: getinfo.py Project: LiamCheng/Twitter_analysis

def readability(text, file):
    fog = textstat.gunning_fog(text)
    fres = textstat.flesch_reading_ease(text)
    fkgl = textstat.flesch_kincaid_grade(text)
    file.write(
        '\nGunning Fog Index: %d \nFlesch Reading Ease: %d \nFlesch-Kincaid Grade: %d'
        % (fog, fres, fkgl))

Example #7

0

Show file

def calculate_statistics(lyrics):
    """
    Calculates statistics based on the text_raw of the lyrics.
    :return: Annotated lyrics containing information about the songs
    """
    logging.info("Calculating Statistics")
    from textstat.textstat import textstat
    for idx, song in tqdm(enumerate(lyrics), total=len(lyrics)):
        try:
            song["num_syllables"] = textstat.syllable_count(song["text_raw"])
            song["num_words"] = textstat.lexicon_count(song["text_raw"])
            song["num_sentences"] = textstat.sentence_count(song["text_raw"])
            song["flesch_score"] = textstat.flesch_reading_ease(
                song["text_raw"])
            song["flesch_kincaid_level"] = textstat.flesch_kincaid_grade(
                song["text_raw"])
            song["fog_score"] = textstat.gunning_fog(song["text_raw"])
            song[
                "num_difficult_words"] = textstat.dale_chall_readability_score(
                    song["text_raw"])
        except Exception as e:
            logging.error(
                "Something bad happened in the current song ! Skipping it... \n{}"
                .format(song))
            logging.exception(e)
    return lyrics

Example #8

0

Show file

File: hs_featureextraction.py Project: georgeberry/bernie_vs_trump

def other_features(tweet):
    ##SENTIMENT
    sentiment = VS(tweet)
    ##READABILITY
    #See https://pypi.python.org/pypi/textstat/
    flesch = round(textstat.flesch_reading_ease(tweet),3)
    flesch_kincaid = round(textstat.flesch_kincaid_grade(tweet),3)
    gunning_fog = round(textstat.gunning_fog(tweet),3)
    ##TEXT-BASED
    length = len(tweet)
    num_terms = len(tweet.split())
    ##TWITTER SPECIFIC TEXT FEATURES
    hashtag_count = tweet.count("#")
    mention_count = tweet.count("@")
    url_count = tweet.count("http")
    retweet = 0
    if tweet.lower().startswith("rt") is True:
        retweet = 1
    #Checking if RT is in the tweet
    words = tweet.lower().split()
    if "rt" in words or "#rt" in words:
        retweet = 1
    features = [sentiment['compound'],flesch, flesch_kincaid,
                gunning_fog, length, num_terms,
                hashtag_count, mention_count,
                url_count, retweet]
    return features

Example #9

0

Show file

File: Menoetius.py Project: rhysmeister/Weasel

 def do_text_stats(self, text):
     ### Syllable Count
     syllable_count = textstat.syllable_count(text)
     ### Lexicon Count
     lexicon_count = textstat.lexicon_count(text, True)
     ### Sentence Count
     sentence_count = textstat.sentence_count(text)
     ### The Flesch Reading Ease formula
     try:
         flesch_reading_ease = textstat.flesch_reading_ease(text)
     except TypeError as e:
         flesch_reading_ease = None
     #* 90-100 : Very Easy
     #* 80-89 : Easy
     #* 70-79 : Fairly Easy
     #* 60-69 : Standard
     #* 50-59 : Fairly Difficult
     #* 30-49 : Difficult
     #* 0-29 : Very Confusing
     ### The The Flesch-Kincaid Grade Level
     try:
         flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
     except TypeError as e:
         flesch_kincaid_grade = None
     ## The Fog Scale (Gunning FOG Formula)
     gunning_fog = textstat.gunning_fog(text)
     ### The SMOG Index
     smog_index = textstat.smog_index(text)
     ### Automated Readability Index
     automated_readability_index = textstat.automated_readability_index(
         text)
     ### The Coleman-Liau Index
     try:
         coleman_liau_index = textstat.coleman_liau_index(text)
     except TypeError as e:
         coleman_liau_index = None
     ### Linsear Write Formula
     linsear_write_formula = textstat.linsear_write_formula(text)
     ### Dale-Chall Readability Score
     dale_chall_readability_score = textstat.dale_chall_readability_score(
         text)
     ### Readability Consensus based upon all the above tests
     try:
         text_standard = textstat.text_standard(text)
     except TypeError as e:
         text_standard = None
     return {
         "syllable_count": syllable_count,
         "lexicon_count": lexicon_count,
         "sentence_count": sentence_count,
         "flesch_reading_ease": flesch_reading_ease,
         "flesch_kincaid_grade": flesch_kincaid_grade,
         "gunning_fog": gunning_fog,
         "smog_index": smog_index,
         "automated_readability_index": automated_readability_index,
         "coleman_liau_index": coleman_liau_index,
         "linsear_write_formula": linsear_write_formula,
         "dale_chall_readability_score": dale_chall_readability_score,
         "text_standard": text_standard
     }

Example #10

0

Show file

File: readability.py Project: ghoulmann/essay_analysis

def readability(text):
    print("Readability\n=================================\n\n")
    print("Flesch Reading Ease\n________________________\n\n")
    print str(textstat.flesch_reading_ease(text)) + "\n"
    print("Smog Index\n________________________\n\n")
    print str(textstat.smog_index(text)) + "\n"
    print("Flesch Kincaid Grade\n________________________\n\n")
    print str(textstat.flesch_kincaid_grade(text)) + "\n"
    print("Coleman Liau Index\n________________________\n\n")
    print str(textstat.coleman_liau_index(text)) + "\n"
    print("ARI\n________________________\n\n")
    print str(textstat.automated_readability_index(text)) + "\n"
    print("Dale Chall\n________________________\n\n")
    print str(textstat.dale_chall_readability_score(text)) + "\n"
    print("Difficult Words\n________________________\n\n")
    print str(textstat.difficult_words(text)) + "\n"
    print("Linsear Write Formula\n________________________\n\n")
    print str(textstat.linsear_write_formula(text)) + "\n"
    print("Gunning Fog\n________________________\n\n")
    print str(textstat.gunning_fog(text)) + "\n"
    print "Compiled Score\n_____________________________\n\n"
    print str(textstat.text_standard(text)) + "\n"


    return len(adjectives)

Example #11

0

Show file

File: download_book.py Project: RealTimeWeb/datasets

def get_special_metrics(text):
    blob = TextBlob(text)
    main = {
        "statistics": {
            "syllables": textstat.syllable_count(text),
            "words": textstat.lexicon_count(text),
            "characters": textstat.char_count(text),
            "polysyllables": textstat.polysyllabcount(text),
            "average letter per word": textstat.avg_letter_per_word(text),
            "average sentence length": textstat.avg_sentence_length(text),
            "average sentence per word": textstat.avg_sentence_per_word(text),
            "sentences": textstat.sentence_count(text),
        },
        "difficulty": {
            "flesch reading ease": textstat.flesch_reading_ease(text),
            "smog index": textstat.smog_index(text),
            "flesch kincaid grade": textstat.flesch_kincaid_grade(text),
            "coleman liau index": textstat.coleman_liau_index(text),
            #'automated readability index': textstat.automated_readability_index(text),
            #'dale chall readability score': textstat.dale_chall_readability_score(text),
            #'difficult words': textstat.difficult_words(text),
            #'linsear write formula': textstat.linsear_write_formula(text),
            "gunning fog": textstat.gunning_fog(text),
        },
        "sentiments": {"polarity": blob.sentiment.polarity, "subjectivity": blob.sentiment.subjectivity},
    }

    return main

Example #12

0

Show file

File: utils.py Project: AWegnerGitHub/SE_Zephyr_VoteRequest_bot

def _get_reading_stats(no_code_text):
    """
    Returns reading level information
    :param no_code_text: String to analyse
    :return: list of details
    """
    group_by = 'Reading Level Analysis '
    results = []
    results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by))        # higher is better, scale 0 to 100
    results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by))
    try:
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by))
    try:
        results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
    results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by))
    results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by))
    try:
        results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by))
    except IndexError:
        results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by))
    try:
        results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by))

    try:
        results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by))
    except (TypeError, IndexError):
        results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by))
    return results

Example #13

0

Show file

File: fe_functions.py Project: pchankh/Kaggle-Toxic

def get_readability(df2):
    df = df2.copy()
    text_feats = df.select_dtypes(include=['object']).columns.values
    for i, col in enumerate(text_feats):
        df['flesch_reading_ease{}'.format(i)] = df[col].apply(
            lambda x: textstat.flesch_reading_ease(x))
        df['smog_index{}'.format(i)] = df[col].apply(
            lambda x: textstat.smog_index(x))
        df['flesch_kincaid_grade{}'.format(i)] = df[col].apply(
            lambda x: textstat.flesch_kincaid_grade(x))
        df['coleman_liau_index{}'.format(i)] = df[col].apply(
            lambda x: textstat.coleman_liau_index(x))
        df['automated_readability_index{}'.format(i)] = df[col].apply(
            lambda x: textstat.automated_readability_index(x))
        df['dale_chall_readability_score{}'.format(i)] = df[col].apply(
            lambda x: textstat.dale_chall_readability_score(x))
        df['difficult_words{}'.format(i)] = df[col].apply(
            lambda x: textstat.difficult_words(x))
        df['linsear_write_formula{}'.format(i)] = df[col].apply(
            lambda x: textstat.linsear_write_formula(x))
        df['gunning_fog{}'.format(i)] = df[col].apply(
            lambda x: textstat.gunning_fog(x))
        df['text_standard{}'.format(i)] = df[col].apply(
            lambda x: textstat.text_standard(x))
    return df

Example #14

0

Show file

File: feature_extraction.py Project: raamish/predicting-scope-of-stackoverflow-questions

def body_word_ari_gunning():
    """
	Body metrics

	1)generating a body word count column
	2)Automated reading index-
		4.71*(characters/words)+0.5*(words/sentences)-21.43
	3)Gunning Fox Index
	"""
    body_word_length = []
    body_sentences_length = []
    sentence_token = []
    valid_grammar_flag = []
    automated_reading_index = []
    gunning_fog = []

    tokenizer = RegexpTokenizer(r'\w+')
    for index, row in df.iterrows():
        """
		use BeautifulSoup to remove tags if required.
		However Body word count should contain the whole body 
		including any tags such as <p> and <code>
		"""
        body_only = re.sub('<code>[^>]+</code>', '', row['Body'])
        soup = BeautifulSoup(body_only, "lxml")
        word_tokens = tokenizer.tokenize(soup.text)
        word_count = len(word_tokens)
        body_word_length.append(word_count)
        tag_removed_text = soup.text
        tag_removed_text = tag_removed_text.replace("\n", "")
        character_count = len(tag_removed_text)
        valid_sentence = re.findall('[\w][.?!]\s[A-Z0-9]', tag_removed_text)
        sentence_token = sent_tokenize(tag_removed_text)
        sentences_count = len(sentence_token)
        body_sentences_length.append(sentences_count)

        if ((len(valid_sentence) + 1) == len(sentence_token)):
            valid_grammar_flag.append(1)
        else:
            valid_grammar_flag.append(0)
        if sentences_count != 0 and word_count != 0:
            ari = 4.71 * (character_count / word_count) + 0.5 * (
                word_count / sentences_count) - 21.43
            gfi = textstat.gunning_fog(tag_removed_text)
        else:
            ari = 14
            gfi = 17
        automated_reading_index.append(ari)
        gunning_fog.append(gfi)

    df['BodyWordCount'] = body_word_length
    df['BodySentencesCount'] = body_sentences_length
    df['ValidGrammar'] = valid_grammar_flag
    df['AutomatedReadingIndex'] = automated_reading_index
    df['GunningFogIndex'] = gunning_fog
    df.to_csv('combined.csv')

Example #15

0

Show file

File: clean_articles.py Project: jombooth/slantparse

def vecify(v):
    return [ts.flesch_reading_ease(v),
    # ts.smog_index(v),
    ts.flesch_kincaid_grade(v),
    ts.coleman_liau_index(v),
    ts.automated_readability_index(v),
    ts.dale_chall_readability_score(v),
    ts.difficult_words(v),
    ts.linsear_write_formula(v),
    ts.gunning_fog(v)]

Example #16

0

Show file

def add_reading_levels(df):
    for row,body in enumerate(df['body']):
        x = df['body'][row]
        df.loc[row,'flesch_kincaid']=textstat.flesch_kincaid_grade(x)
        df.loc[row,'fk_score']=textstat.flesch_reading_ease(x)
        #df.loc[row,'smog_index']=textstat.smog_index(x)
        df.loc[row,'gunning_fog']=textstat.gunning_fog(x)
        #df.loc[row,'difficult_words']=textstat.difficult_words(x)
        #df.loc[row,'text_standard']=textstat.text_standard(x)
    return df

Example #17

0

Show file

def all_trad_scores(text):
    fre = textstat.flesch_reading_ease(text)
    fkg = textstat.flesch_kincaid_grade(text)
    smog = textstat.smog_index(text)
    cole = textstat.coleman_liau_index(text)
    ari = textstat.automated_readability_index(text)
    dale = textstat.dale_chall_readability_score(text)
    linsear = textstat.linsear_write_formula(text)
    gunning = textstat.gunning_fog(text)

    return [fre, fkg, smog, cole, ari, dale, linsear, gunning]

Example #18

0

Show file

File: data_preprocess.py Project: tranj/okcupid-profile-analysis

def textstat_analysis(profile_text):
    fre = textstat.flesch_reading_ease(profile_text)
    smog = textstat.smog_index(profile_text)
    fkg = textstat.flesch_kincaid_grade(profile_text)
    coleman = textstat.coleman_liau_index(profile_text)
    ari = textstat.automated_readability_index(profile_text)
    dale = textstat.dale_chall_readability_score(profile_text)
    dw = textstat.difficult_words(profile_text)
    lwf = textstat.linsear_write_formula(profile_text)
    gf = textstat.gunning_fog(profile_text)
    rc = textstat.readability_consensus(profile_text)
    word_count = textstat.lexicon_count(profile_text)
    return (fre, smog, fkg, coleman, ari, dale, dw, lwf, gf, rc, word_count)

Example #19

0

Show file

File: textAnalyzer.py Project: winstonll/barima

 def reading_difficulty(self):
     diff_words = textstat.difficult_words(self.text) / self.nword
     flesch_kincaid = textstat.flesch_kincaid_grade(self.text)
     coleman_liau = textstat.coleman_liau_index(self.text)
     ari = textstat.automated_readability_index(self.text)
     dale_chall = textstat.dale_chall_readability_score(self.text)
     linsear = textstat.linsear_write_formula(self.text)
     gunning_fog = textstat.gunning_fog(self.text) - 6
     smog = textstat.smog_index(self.text)
     avg_grade = max(
         math.ceil((flesch_kincaid + coleman_liau + ari + dale_chall +
                    linsear + gunning_fog + smog) / 7), 12)
     return avg_grade, diff_words

Example #20

0

Show file

File: utility.py Project: yingtaiGithub/SEC-Files-Scraping

def get_readability(contents):
    readability = []
    readability.append(textstat.flesch_reading_ease(contents))
    readability.append(textstat.smog_index(contents))
    readability.append(textstat.flesch_kincaid_grade(contents))
    readability.append(textstat.automated_readability_index(contents))
    readability.append(textstat.dale_chall_readability_score(contents))
    readability.append(textstat.difficult_words(contents))
    readability.append(textstat.linsear_write_formula(contents))
    readability.append(textstat.gunning_fog(contents))
    readability.append(textstat.coleman_liau_index(contents))
    readability.append(textstat.text_standard(contents))

    return readability

Example #21

0

Show file

File: medplus.py Project: icdcu/readcomp

def scores_cal_ori(text):

              char_count_value=textstat.char_count(text,ignore_spaces=True)
              lexicon_count_value=textstat.lexicon_count(text,removepunct=True)
              syllable_count_value=textstat.syllable_count(text)
              sentence_count_value=textstat.sentence_count(text)
              avg_sentence_length_value=textstat.avg_sentence_length(text)
              avg_syllables_per_word_value=textstat.avg_syllables_per_word(text)
              avg_letter_per_word_value=textstat.avg_letter_per_word(text)
              avg_sentence_per_word_value=textstat.avg_sentence_per_word(text)
              flesch_kincaid_grade_value=textstat.flesch_kincaid_grade(text)
              smog_index_value=textstat.smog_index(text)
              gunning_fog_value=textstat.gunning_fog(text)
              difficult_words_value=textstat.difficult_words(text)
              dale_chall_value=textstat.dale_chall_readability_score(text)
              polysyllab_value=textstat.polysyllabcount(text)
              return char_count_value,lexicon_count_value,syllable_count_value,sentence_count_value,avg_sentence_length_value,avg_syllables_per_word_value,avg_letter_per_word_value,avg_sentence_per_word_value,flesch_kincaid_grade_value,smog_index_value,gunning_fog_value,difficult_words_value,dale_chall_value,polysyllab_value
              return smog_index_value

Example #22

0

Show file

def analyse_json(json_text):
    # consider moving this to be a feature of Transcript in the other module

    df_witnesses = pd.DataFrame(columns=['html_file_location', 'witness_name',
                                         'syllable_count','lexicon_count',
                                         'sentence_count',
                                         'syllables_per_word',
                                         'gunning_fog', 'smog_index',
                                         'text_standard'],
                      index=[])

    trscrpt = json.loads(json_text)
    if 'witnesses' in trscrpt:
        witnesses = trscrpt['witnesses']


        for s in trscrpt['all_sections']:
            if 'speaker' in s and 'person' in s['speaker'] and \
                    s['speaker']['person']['speaker_type']=='witness':
                witness =  witnesses[s['speaker']['person']['name']]
                witness.setdefault('all_text', []).append(s['spoken_text'])

        for i, p in enumerate(witnesses):
            if 'all_text' in witnesses[p]:
                witness_text = '\n\n'.join(witnesses[p]['all_text'])
                if len(witness_text) > 0:
                    stats_data = {'html_file_location': trscrpt['html_file_location'],
                                  'witness_name': p,
                                  'syllable_count': textstat.syllable_count(witness_text),
                                  'lexicon_count': textstat.lexicon_count(witness_text),
                                  'sentence_count': textstat.sentence_count(witness_text),
                                  'syllables_per_word': textstat.avg_syllables_per_word(witness_text),
                                  'gunning_fog': textstat.gunning_fog(witness_text),
                                  'smog_index': textstat.smog_index(witness_text),
                                  'text_standard': textstat.text_standard(witness_text)}
                    df_witnesses.loc['witness_%i' % i] = stats_data
                else:
                    df_witnesses.loc['witness_%i' % i, 'html_file_location'] = trscrpt['html_file_location']
                    df_witnesses.loc['witness_%i' % i, 'witness_name'] = p
            else:
                df_witnesses.loc['witness_%i' % i, 'html_file_location'] = trscrpt['html_file_location']
                df_witnesses.loc['witness_%i' % i, 'witness_name'] = p

    return df_witnesses

Example #23

0

Show file

File: nltk_functions.py Project: elswob/pure-comp

def run_textstat(text):
    #text = """Playing games has always been thought to be important to the development of well-balanced and creative children; however, what part, if any, they should play in the lives of adults has never been researched that deeply. I believe that playing games is every bit as important for adults as for children. Not only is taking time out to play games with our children and other adults valuable to building interpersonal relationships but is also a wonderful way to release built up tension."""

    ts_flesch_reading_ease = textstat.flesch_reading_ease(text)
    ts_smog_index = textstat.smog_index(text)
    ts_flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
    ts_coleman_liau_index = textstat.coleman_liau_index(text)
    ts_automated_readability_index = textstat.automated_readability_index(text)
    ts_dale_chall_readability_score = textstat.dale_chall_readability_score(
        text)
    ts_difficult_words = textstat.difficult_words(text)
    ts_linsear_write_formula = textstat.linsear_write_formula(text)
    ts_gunning_fog = textstat.gunning_fog(text)
    ts_text_standard = textstat.text_standard(text)

    return (ts_flesch_reading_ease, ts_smog_index, ts_flesch_kincaid_grade,
            ts_coleman_liau_index, ts_automated_readability_index,
            ts_dale_chall_readability_score, ts_difficult_words,
            ts_linsear_write_formula, ts_gunning_fog, ts_text_standard)

Example #24

0

Show file

File: text_indexes.py Project: BitspleaseSliit/topics-and-complexity

def gunning_fog(text):

    score = textstat.gunning_fog(text)
    level = 0
    if 0 < score < 7:
        level = 1
    elif 7 <= score < 9:
        level = 2
    elif 9 <= score < 12:
        level = 3
    elif 12 <= score < 13:
        level = 4
    elif 13 <= score < 14:
        level = 5
    elif 14 <= score < 15:
        level = 6
    elif 15 <= score:
        level = 7

    return level

Example #25

0

Show file

    def get_feat_readability_metrics(self):
        # https://github.com/shivam5992/textstat

        try:
            test_data = self.webscrap.get_body()
            out = []
            out.append(textstat.flesch_reading_ease(test_data))
            out.append(textstat.smog_index(test_data))
            out.append(textstat.flesch_kincaid_grade(test_data))
            out.append(textstat.coleman_liau_index(test_data))
            out.append(textstat.automated_readability_index(test_data))
            out.append(textstat.dale_chall_readability_score(test_data))
            out.append(textstat.difficult_words(test_data))
            out.append(textstat.linsear_write_formula(test_data))
            out.append(textstat.gunning_fog(test_data))
            #out.append(textstat.text_standard(test_data))
            return out, False

        except Exception as e:
            config.logger.error(repr(e))
            return MISSING_FEATURE * 9, True

Example #26

0

Show file

File: main.py Project: danhanley/readability

def lambda_handler(event, context):

    text = event['text']

    response = {}
    response['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
    response['smog_index'] = textstat.smog_index(text)
    response['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
    response['coleman_liau_index'] = textstat.coleman_liau_index(text)
    response[
        'automated_readability_index'] = textstat.automated_readability_index(
            text)
    response[
        'dale_chall_readability_score'] = textstat.dale_chall_readability_score(
            text)
    response['difficult_words'] = textstat.difficult_words(text)
    response['linsear_write_formula'] = textstat.linsear_write_formula(text)
    response['gunning_fog'] = textstat.gunning_fog(text)
    response['text_standard'] = textstat.text_standard(text)

    return respond(None, response)

Example #27

0

Show file

def analyseText():
    values = request.get_json()
    required = [ 'inputText' ]
    if not all(k in values for k in required):
        return 'Missing values', 400

    text = values['inputText']
    result = {
        'syllable_count': textstat.syllable_count(text),
        'lexicon_count': textstat.lexicon_count(text),
        'sentence_count': textstat.sentence_count(text),
        'flesch_reading_ease': textstat.flesch_reading_ease(text),
        'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text),
        'gunning_fog': textstat.gunning_fog(text),
        'smog_index': textstat.smog_index(text),
        'automated_readability_index': textstat.automated_readability_index(text),
        'coleman_liau_index': textstat.coleman_liau_index(text),
        'linsear_write_formula': textstat.linsear_write_formula(text),
        'dale_chall_readability_score': textstat.dale_chall_readability_score(text)
    };

    return jsonify(result), 200

Example #28

0

Show file

File: read_data.py Project: mvpjox2233/Auto_Score

def feature_readability(essay):
    syllable_count = textstat.syllable_count(essay)
    #音节数统计
    flesch_reading_ease = textstat.flesch_reading_ease(essay)
    #文档的易读性0-100之间的分数
    smog_index = textstat.smog_index(essay)
    #烟雾指数，反映文档的易读程度，更精确，更容易计算
    flesch_kincaid_index = textstat.flesch_kincaid_grade(essay)
    #等级分数，年级等级
    coleman_liau_index = textstat.coleman_liau_index(essay)
    #返回文本的年级级别
    automated_readability_index = textstat.automated_readability_index(essay)
    #自动可读性指数，接近理解文本需要的年级
    dale_chall_readability_score = textstat.dale_chall_readability_score(essay)
    #返回年级级别，使用最常见的英文单词
    difficult_words = textstat.difficult_words(essay)

    linsear_write_formula = textstat.linsear_write_formula(essay)
    #返回文本的年级级别
    gunning_fog = textstat.gunning_fog(essay)
    #迷雾指数， 反映文本的阅读难度
    return syllable_count, flesch_reading_ease, smog_index, flesch_kincaid_index, coleman_liau_index, automated_readability_index, dale_chall_readability_score, difficult_words, linsear_write_formula, gunning_fog

Example #29

0

Show file

File: jobs.py Project: ahmadassaf/beek.it

def calculate_readability_measures(id):
    """ Count the words in doc and update the document. """
    es = elasticsearch.Elasticsearch()
    source = es.get_source(index='beek', doc_type='page', id=id)
    # count = len(source['content'].split())
    try:
        measures = {
            'flesch':
            textstat.flesch_reading_ease(source['content']),
            'smog':
            textstat.smog_index(source['content']),
            'flesch_kincaid':
            textstat.flesch_kincaid_grade(source['content']),
            'coleman_liau':
            textstat.coleman_liau_index(source['content']),
            'readability':
            textstat.automated_readability_index(source['content']),
            'dale_chall':
            textstat.dale_chall_readability_score(source['content']),
            'difficult_words':
            textstat.difficult_words(source['content']),
            'linsear_write_formula':
            textstat.linsear_write_formula(source['content']),
            'gunning_fog':
            textstat.gunning_fog(source['content']),
            'consensus':
            textstat.readability_consensus(source['content']),
        }

        es.update(index='beek',
                  doc_type='page',
                  id=id,
                  body={'doc': {
                      'measures': measures
                  }},
                  refresh=True)
    except Exception as err:
        pass

Example #30

0

Show file

File: jobs.py Project: ahmadassaf/Beek.it

def calculate_readability_measures(id):
    """ Count the words in doc and update the document. """
    es = elasticsearch.Elasticsearch()
    source = es.get_source(index='beek', doc_type='page', id=id)
    # count = len(source['content'].split())
    try:
        measures = {
            'flesch': textstat.flesch_reading_ease(source['content']),
            'smog': textstat.smog_index(source['content']),
            'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']),
            'coleman_liau': textstat.coleman_liau_index(source['content']),
            'readability': textstat.automated_readability_index(source['content']),
            'dale_chall': textstat.dale_chall_readability_score(source['content']),
            'difficult_words': textstat.difficult_words(source['content']),
            'linsear_write_formula': textstat.linsear_write_formula(source['content']),
            'gunning_fog': textstat.gunning_fog(source['content']),
            'consensus': textstat.readability_consensus(source['content']),
        }

        es.update(index='beek', doc_type='page', id=id,
                  body={'doc': {'measures': measures}}, refresh=True)
    except Exception as err:
        pass

Example #31

0

Show file

File: feature2.py Project: saifkhanali9/FakeNewsDetection

def process(data):
    res = np.array([])
    cleaned = data.lower().strip()
    original = data.strip()
    fea1 = numOfWords(cleaned)
    # fea1 = fea1 / 10
    fea2 = numOfChar(cleaned)
    # fea2 = fea2 / 100
    fea3 = count(cleaned, string.punctuation)
    fea5 = numOfContUpperCase(original)
    fea4 = textstat.gunning_fog(data)
    fea6 = textstat.automated_readability_index(data)
    fea7 = textstat.linsear_write_formula(data)
    fea8 = textstat.difficult_words(data)
    fea9 = textstat.dale_chall_readability_score(data)
    fea10 = data.count("\'") + data.count(".") + data.count("\"") + data.count(",") + data.count(
        "’") + data.count("‘") + data.count("”") + data.count("“")
    fea10 = (fea10 / len(data)) * 1000
    fea11 = data.count("1") + data.count("2") + data.count("3") + data.count("4") + data.count(
        "5") + data.count("6") + data.count("7") + data.count("8") + data.count("9") + data.count("0")
    fea12 = data.count("?") + data.count("!") + data.count("@") + data.count("#") + data.count(
        "$") + data.count("%") + data.count("&")
    fea13 = data.count(":") + data.count(";")
    fea14 = data.count("—") + data.count("-") + data.count("_")
    fea15 = (fea10 / len(data)) * 100
    fea16 = data.count("(") + data.count(")") + data.count("[") + data.count("]") + data.count(
        "{") + data.count("}")

    fea17 = data.count("*") + data.count("/")
    fea18 = data.count("?")
    fea19 = fea10 + fea11 + fea12 + fea13 + fea14 + fea15 + fea16 + fea17 + fea18
    res = np.array([[fea1, fea2, fea3, fea5, fea4, fea6, fea7, fea8, fea9, fea10, fea11, fea12, fea13, fea14,
                     fea15, fea16, fea17, fea18, fea19]])


    return res

Example #32

0

Show file

File: predictor.py Project: raphy78626/election_prediction

    def get_readability(self, corpus, type='ari'):
        readability = None
        if type == 'ari':
            readability = textstat.automated_readability_index(corpus)
        elif type == 'flesch':
            readability = textstat.flesch_reading_ease(corpus)
        elif type == 'smog':
            readability = textstat.smog_index(corpus)
        elif type == 'flesch_kinciad':
            readability = textstat.flesch_kincaid_grade(corpus)
        elif type == 'coleman':
            readability = textstat.coleman_liau_index(corpus)
        elif type == 'dale_chall':
            readability = textstat.dale_chall_readability_score(corpus)
        elif type == 'difficult_words':
            readability = textstat.difficult_words(corpus)
        elif type == 'linsear':
            readability = textstat.linsear_write_formula(corpus)
        elif type == 'gunning_fog':
            readability = textstat.gunning_fog(corpus)
        elif type == 'readability_conensus':
            readability = textstat.readability_consensus(corpus)

        return readability

Example #33

0

Show file

 def stats(self, text):
     test_data = text
     stats = {}
     stats['flesch_reading_ease'] = textstat.flesch_reading_ease(test_data)
     stats['smog'] = textstat.smog_index(test_data)
     stats['flesch kincaid'] = textstat.flesch_kincaid_grade(test_data)
     stats['coleman Liau'] = textstat.coleman_liau_index(test_data)
     stats['automated'] = textstat.automated_readability_index(test_data)
     stats['dale chall'] = textstat.dale_chall_readability_score(test_data)
     stats['difficult'] = textstat.difficult_words(test_data)
     stats['linsear'] = textstat.linsear_write_formula(test_data)
     stats['gunning_fog'] = textstat.gunning_fog(test_data)
     stats['standard'] = textstat.text_standard(test_data)
     stats['charcount'] = textstat.char_count(test_data)
     stats['lexicon count'] = textstat.lexicon_count(test_data)
     stats['syllable count'] = textstat.syllable_count(test_data)
     stats['sentence count'] = textstat.sentence_count(test_data)
     stats['avg sentence length'] = textstat.avg_sentence_length(test_data)
     stats['avg_syllables_per_word'] = textstat.avg_syllables_per_word(
         test_data)
     stats['avg_letter_per_word'] = textstat.avg_letter_per_word(test_data)
     stats['avg_sentence_per_word'] = textstat.avg_sentence_per_word(
         test_data)
     return stats

Example #34

0

Show file

File: reading-from-csv.py Project: ashimpaudel/tweet-analysis

	lwf_grades = []
	lwf_total_grade = 0
	# Dale-Chall Readability Score: goo.gl/dvmXmx
	dcr_grades = []
	dcr_total_grade = 0		
	
	num_tweets = 0
	for tweet in cleanest_tweets:
			# skipping tweets which are not just contextbased text. 
			if textstat.sentence_count(tweet) < 1:
				continue
			flesch_kincaid_grade = textstat.flesch_kincaid_grade(tweet)	
			flesch_kincaid_grades.append(flesch_kincaid_grade)
			flesch_kincaid_total_grade += flesch_kincaid_grade

			gunning_fog_grade = textstat.gunning_fog(tweet)	
			gunning_fog_grades.append(gunning_fog_grade)
			gunning_fog_total_grade += gunning_fog_grade

			smog_index_grade = textstat.smog_index(tweet)	
			smog_index_grades.append(smog_index_grade)
			smog_index_total_grade += smog_index_grade

			ar_index_grade = textstat.automated_readability_index(tweet)	
			ar_index_grades.append(ar_index_grade)
			ar_index_total_grade += ar_index_grade
			
			cl_index_grade = textstat.coleman_liau_index(tweet)	
			cl_index_grades.append(cl_index_grade)
			cl_index_total_grade += cl_index_grade

Example #35

0

Show file

File: omahony_smyth.py Project: ankeshanand/review-helpfulness

reviews['scores'] = reviews['helpful'].apply(compute_score)
print reviews['scores'].head(n=10)
y = reviews['scores']
Text = reviews['reviewText']
del reviews

X = np.zeros((len(Text), 4))

for idx, review in enumerate(Text):
    if review == '':
        continue
    try:
        X[idx][0] = ts.flesch_reading_ease(review)
        X[idx][1] = ts.flesch_kincaid_grade(review)
        X[idx][2] = ts.gunning_fog(review)
        X[idx][3] = ts.smog_index(review)
    except Exception as e:
        print review
        print e

X = StandardScaler().fit_transform(X)
print 'Computed X'
print X[0]

model = SVR(verbose=True)
params = {'C': [0.1, 0.5]}
grid = GridSearchCV(model, params, cv=10, scoring='mean_squared_error', n_jobs=-1)
grid.fit(X, y)
print grid.best_score_
print 'RMSE: ' + str(sqrt(abs(grid.best_score_)))

Example #36

0

Show file

#main script
if __name__ == '__main__':

	print "TextStat Comparison Script"
	print "--------------------------"
	
	#read in text from the command line
	#This needs to be fixed to deal/escape special characters
	textToCheck = raw_input("Please enter the text you would like to analyse: ") 
	
	#read in text from a file- but what format?
	
	print "\n\n"
	print "Results"
	print "=============================================="
	print "==============================================\n"
	
	print "Syllable Count: " + str(textstat.syllable_count(textToCheck))
	print "Lexicon Count: " + str(textstat.lexicon_count(textToCheck)) #TRUE is default and removes punctuation before counting
	print "Sentence Count: " + str(textstat.sentence_count(textToCheck))
	print "Flesch Reading Ease formula: " + str(textstat.flesch_reading_ease(textToCheck))
	print "Flesch-Kincaid Grade Level: " + str(textstat.flesch_kincaid_grade(textToCheck))
	print "Fog Scale (Gunning FOG Formula): " + str(textstat.gunning_fog(textToCheck))
	print "SMOG Index: " + str(textstat.smog_index(textToCheck))
	print "Automated Readability Index: " + str(textstat.automated_readability_index(textToCheck))
	print "Coleman-Liau Index: " + str(textstat.coleman_liau_index(textToCheck))
	print "Linsear Write Formula: " + str(textstat.linsear_write_formula(textToCheck))
	print "Dale-Chall Readability Score: " + str(textstat.dale_chall_readability_score(textToCheck))
	print "--------------------------------------------------------------"
	print "Readability Consensus based upon all the above tests: " + str(textstat.text_standard(textToCheck))
	print "\n\n"

Example #37

0

Show file

        coleman_liau_index = textstat.coleman_liau_index(test_data)
        print(coleman_liau_index)
        print("Automated Readability Index (ARI)")
        # print(textstat.automated_readability_index(test_data))
        automated_readability_index = textstat.automated_readability_index(
            test_data)
        print(automated_readability_index)
        # print("Dale-Chall Readability Score")
        # print(textstat.dale_chall_readability_score(test_data))
        print("Linsear Write Formula")
        # print(textstat.linsear_write_formula(test_data))
        linsear_write_formula = textstat.linsear_write_formula(test_data)
        print(linsear_write_formula)
        print("The Fog Scale (Gunning FOG Formula)")
        # print(textstat.gunning_fog(test_data))
        gunning_fog = textstat.gunning_fog(test_data)
        print(gunning_fog)

        print(
            "---------------------------------Summary----------------------------------"
        )
        print("Readability Consensus based upon all the above tests")
        print(
            "Based upon all the above tests, "
            "returns the estimated school grade level required to understand the text."
        )
        # print(textstat.text_standard(test_data))
        school_grade_level = textstat.text_standard(test_data)
        print(school_grade_level)

        # l = [flesch_kincaid_grade, coleman_liau_index, automated_readability_index, linsear_write_formula, gunning_fog]

Example #38

0

Show file

File: tweet_textstat.py Project: elewis442/world_leader_tweet_comparison

def gunning_fog(string):
    result = ts.gunning_fog(string)
    return result

Example #39

0

Show file

File: crawler.py Project: kevinschaich/billboard-top-100-lyrics

                    target.write(lyrics)
                    target.close()

                    # Build Dataset
                    try:
                        cur = {
                            "title": title,
                            "artist": artist,
                            "year": year,
                            "pos": pos,
                            "lyrics": lyrics,
                            "tags": get_tags(artist),
                            "sentiment": sent_analyzer.polarity_scores(lyrics_repl),
                            "f_k_grade": ts.flesch_kincaid_grade(lyrics_repl),
                            "flesch_index": ts.flesch_reading_ease(lyrics_repl),
                            "fog_index": ts.gunning_fog(lyrics_repl),
                            "difficult_words": ts.difficult_words(lyrics_repl),
                            "num_syllables": ts.syllable_count(lyrics_repl),
                            "num_words": ts.lexicon_count(lyrics_repl, True),
                            "num_lines": ts.sentence_count(lyrics_repl),
                            "num_dupes": count_dupes(lyrics)
                        }
                        # print cur
                        dataset.append(cur)
                    except Exception, e:
                        print e

            except Exception, e:
                print "Exception occurred for " + artist + ' - ' + title
                print e

Example #40

0

Show file

    def updateData(self):

        # Full list of polarity scores
        self.polscore = self.sid.polarity_scores(self.text)

        ##### INDEX 0 IN DATA: Text Sentiment #####
        # [INDEX 0] Compounded score (0.0 - 1.0)            [INDEX 1] Negative connotation rating (0.0 - 1.0),
        # [INDEX 2] Positive connotation rating (0.0 - 1.0) [INDEX 3] Neutral connotation rating (0.0 - 1.0)
        self.data.append([
            self.polscore['compound'], self.polscore['neg'],
            self.polscore['pos'], self.polscore['neu']
        ])

        ##### INDEX 1 IN DATA: Sentence Info #####
        # [INDEX 0] Sentence count          [INDEX 1] Average sentence length
        # [INDEX 2] Syllable count          [INDEX 3] Overall word count
        # [INDEX 4] Character count         [INDEX 5] Character count without spaces
        # [INDEX 6] Avg letters per word    [INDEX 7] Avg syllables per word
        self.data.append([
            textstat.sentence_count(self.text),
            textstat.avg_sentence_length(self.text),
            textstat.syllable_count(self.text),
            len(self.splList),
            textstat.char_count(self.text, False),
            textstat.char_count(self.text, True),
            textstat.avg_letter_per_word(self.text),
            textstat.avg_syllables_per_word(self.text)
        ])

        ##### INDEX 2 IN DATA: Flesch Reading Ease #####
        # [INDEX 0] Pure score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 100
        self.freRaw = textstat.flesch_reading_ease(self.text)
        self.freStat = min(max(self.freRaw, 0), 100)
        self.data.append([
            round(self.freStat, 3),
            self.freGrade(self.freStat),
            round(abs(self.freStat - 100), 2)
        ])

        ##### INDEX 3 IN DATA: Flesch-Kincaid Grade #####
        # [INDEX 0] Pure score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.fkgRaw = textstat.flesch_kincaid_grade(self.text)
        self.fkgStat = self.adjustScore(self.fkgRaw)
        self.data.append([
            round(self.fkgStat, 3),
            self.grade(self.fkgStat),
            round(self.fkgStat / 0.18, 2)
        ])

        ##### INDEX 4 IN DATA: Gunning FOG Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.fogRaw = textstat.gunning_fog(self.text)
        self.fogStat = self.adjustScore(self.fogRaw)
        self.data.append([
            round(self.fogStat, 3),
            self.grade(self.fogStat),
            round(self.fogStat / 0.18, 2)
        ])

        ##### INDEX 5 IN DATA: SMOG Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.smogRaw = textstat.smog_index(self.text)
        self.smogStat = self.adjustScore(self.smogRaw)
        self.data.append([
            round(self.smogStat, 3),
            self.grade(self.smogStat),
            round(self.smogStat / 0.18, 2)
        ])

        ##### INDEX 6 IN DATA: Automated Readability Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 14
        self.ariRaw = textstat.automated_readability_index(self.text)
        self.ariStat = min(max(self.ariRaw, 0), 14)
        self.data.append([
            round(self.ariStat, 3),
            self.ariGrade(ceil(self.ariStat)),
            round(self.ariStat / 0.14, 2)
        ])  #13

        ##### INDEX 7 IN DATA: Coleman-Liau Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.cliRaw = textstat.coleman_liau_index(self.text)
        self.cliStat = self.adjustScore(self.cliRaw)
        self.data.append([
            round(self.cliStat, 3),
            self.grade(self.cliStat),
            round(self.cliStat / 0.18, 2)
        ])

        ##### INDEX 8 IN DATA: Linsear Write Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.lwiRaw = textstat.linsear_write_formula(self.text)
        self.lwiStat = self.adjustScore(self.lwiRaw)
        self.data.append([
            round(self.lwiStat, 3),
            self.grade(self.lwiStat),
            round(self.lwiStat / 0.18, 2)
        ])

        ##### INDEX 9 IN DATA: Dale-Chall Readability Score #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 10
        self.dcrRaw = textstat.dale_chall_readability_score(self.text)
        self.dcrStat = min(max(self.dcrRaw, 0), 10)
        self.data.append([
            round(self.dcrStat, 3),
            self.daleChallGrade(self.dcrStat),
            round(self.dcrStat / 0.1, 2)
        ])

        ##### INDEX 10 IN DATA: Overall Score #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 20
        self.txtRaw = textstat.text_standard(self.text, True)
        self.txtStd = min(max(self.txtRaw, 0), 20)
        self.txtInfo = textstat.text_standard(self.text)
        self.data.append([
            round(self.txtStd, 3),
            self.txtGrade(self.txtStd, self.txtInfo),
            round(self.txtStd / 0.2, 2)
        ])

        return self.data

Example #41

0

Show file

File: analysis.py Project: hughpearse/classic-novels-analysis

#!/bin/python

import sys, string, os
from textstat.textstat import textstat

inputfile = ''
test_data = ""

script_name = sys.argv[0]
inputfile = sys.argv[1]

with open(inputfile) as myfile:
	test_data="".join(line.rstrip() for line in myfile)

var1 = str(textstat.flesch_reading_ease(test_data))
var2 = str(textstat.smog_index(test_data))
var3 = str(textstat.flesch_kincaid_grade(test_data))
var4 = str(textstat.coleman_liau_index(test_data))
var5 = str(textstat.automated_readability_index(test_data))
var6 = str(textstat.dale_chall_readability_score(test_data))
var7 = str(textstat.difficult_words(test_data))
var8 = str(textstat.linsear_write_formula(test_data))
var9 = str(textstat.gunning_fog(test_data))
var10 = str(textstat.readability_consensus(test_data))
var11 = str(textstat.syllable_count(test_data))
var12 = str(textstat.lexicon_count(test_data, 1))
var13 = str(textstat.sentence_count(test_data))

print(var1 + ',' + var2 + ',' + var3 + ',' + var4 + ',' + var5 + ',' + var6 + ',' + var7 + ',' + var8 + ',' + var9 + ',' + var10 + ',' + var11 + ',' + var12 + ',' + var13)