Python automated_readability_index Exemples, textstat.automated_readability_index Python Exemples

Exemple #1

0

Afficher le fichier

def get_stats(text):
    fre = textstat.flesch_reading_ease(text)
    smog = textstat.smog_index(text)
    fkg = textstat.flesch_kincaid_grade(text)
    cli = textstat.coleman_liau_index(text)
    ari = textstat.automated_readability_index(text)
    dcr = textstat.dale_chall_readability_score(text)
    diff_words = textstat.difficult_words(text)
    lwf = textstat.linsear_write_formula(text)
    gunn_fog = textstat.gunning_fog(text)
    consolidated_score = textstat.text_standard(text)

    doc_length = len(text)  # think about excluding spaces?
    quote_count = text.count('"')

    stats = {
        "flesch_reading_ease": fre,
        "smog_index": smog,
        "flesch_kincaid_grade": fkg,
        "coleman_liau_index": cli,
        "automated_readability_index": ari,
        "dale_chall_readability_score": dcr,
        "difficult_words": diff_words,
        "linsear_write_formula": lwf,
        "gunning_fog": gunn_fog,
        "consolidated_score": consolidated_score,
        "doc_length": doc_length,
        "quote_count": quote_count
    }
    return stats

Exemple #2

0

Afficher le fichier

Fichier : feature_engineering.py Projet : aktilot/insight2019

def textstat_stats(text):
    doc_length = len(text.split()) 
    flesch_ease = ts.flesch_reading_ease(text) #Flesch Reading Ease Score
    flesch_grade = ts.flesch_kincaid_grade(text) #Flesch-Kincaid Grade Level
    gfog = ts.gunning_fog(text) # FOG index, also indicates grade level
#    smog = ts.smog_index(text) # SMOG index, also indicates grade level, only useful on 30+ sentences
    auto_readability = ts.automated_readability_index(text) #approximates the grade level needed to comprehend the text.
    cl_index = ts.coleman_liau_index(text) #grade level of the text using the Coleman-Liau Formula.
    lw_formula = ts.linsear_write_formula(text) #grade level using the Linsear Write Formula.
    dcr_score = ts.dale_chall_readability_score(text) #uses a lookup table of the most commonly used 3000 English words
#    text_standard = ts.text_standard(text, float_output=False) # summary of all the grade level functions
    syll_count = ts.syllable_count(text, lang='en_US')
    syll_count_scaled = syll_count / doc_length
    lex_count = ts.lexicon_count(text, removepunct=True)
    lex_count_scaled = lex_count / doc_length
    idx = ['flesch_ease', 'flesch_grade','gfog',
           'auto_readability','cl_index','lw_formula',
           'dcr_score', 
#           'text_standard', 
           'syll_count', 'lex_count']
    return pd.Series([flesch_ease, flesch_grade, gfog, 
                      auto_readability, cl_index, lw_formula, 
                      dcr_score, 
#                      text_standard, 
                      syll_count_scaled, lex_count_scaled], index = idx)

Exemple #3

0

Afficher le fichier

def test(text):
    #print (text)
    score = textstat.automated_readability_index((str(text)))
    if math.isnan(score) == True:
        return 0.0
    else:
        return score

Exemple #4

0

Afficher le fichier

Fichier : DataAnalysis.py Projet : pra8eek/BiopicAnalysis

def getReadabilityMetrics(test_data):
    '''
        for a given article IN TEXT FORMAT, returns its readability metrics
        Uses textstat library, please install it
    '''
    metric = {
        "flesch_reading_ease":
        textstat.flesch_reading_ease(test_data),
        "smog_index":
        textstat.smog_index(test_data),
        "flesch_kincaid_grade":
        textstat.flesch_kincaid_grade(test_data),
        "coleman_liau_index":
        textstat.coleman_liau_index(test_data),
        "automated_readability_index":
        textstat.automated_readability_index(test_data),
        "dale_chall_readability_score":
        textstat.dale_chall_readability_score(test_data),
        "difficult_words":
        textstat.difficult_words(test_data),
        "linsear_write_formula":
        textstat.linsear_write_formula(test_data),
        "gunning_fog":
        textstat.gunning_fog(test_data),
        "text_standard":
        textstat.text_standard(test_data)
    }
    return metric

Exemple #5

0

Afficher le fichier

Fichier : one.py Projet : ddebrup/Wordadora

def fin(words):
    word_list = []
    global wordle
    if 'wordle' not in globals():
        wordle = {}
    #excepted = []
    #definition = []
    #example = []
    from nltk.corpus import wordnet
    import textstat

    for word in words:
        syns = wordnet.synsets(word.lower())
        if not syns:
            continue
        else:
            #li=[]
            #word_list.append(word)
            #        #definition.append(syns[0].definition())
            #        #example.append(syns[0].examples()[0])
            #        li.append(syns[0].definition())

            #li.append(textstat.automated_readability_index(word.lower()))
            # wordle[word]=textstat.flesch_reading_ease(word.lower())
            wordle[word] = textstat.automated_readability_index(word.lower())

    jso()

Exemple #6

0

Afficher le fichier

Fichier : DE_main.py Projet : zzs-NLP/ACS-QG

def get_readibility(text, metric="flesch_kincaid_grade"):
    """
    Return a score which reveals a piece of text's readability level.
    Reference: https://chartbeat-labs.github.io/textacy/getting_started/quickstart.html
               https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests
    """
    if metric == "flesch_kincaid_grade":
        result = textstat.flesch_kincaid_grade(text)
    elif metric == "flesch_reading_ease":
        result = textstat.flesch_reading_ease(text)
    elif metric == "smog_index":
        result = textstat.smog_index(text)
    elif metric == "coleman_liau_index":
        result = textstat.coleman_liau_index(text)
    elif metric == "automated_readability_index":
        result = textstat.automated_readability_index(text)
    elif metric == "dale_chall_readability_score":
        result = textstat.dale_chall_readability_score(text)
    elif metric == "difficult_words":
        result = textstat.difficult_words(text)
    elif metric == "linsear_write_formula":
        result = textstat.linsear_write_formula(text)
    elif metric == "gunning_fog":
        result = textstat.gunning_fog(text)
    elif metric == "text_standard":
        result = textstat.text_standard(text)
    else:
        print("ERROR: Please select correct metric!")
        result = None
    return result

Exemple #7

0

Afficher le fichier

Fichier : comparison_metrics.py Projet : stevenjmorgan/CAPS

def read_metrics(text_clean):

    table = {}

    #table['flesch'] = textstat.flesch_reading_ease(text_clean)
    #table['flesch_kincaid'] = textstat.flesch_kincaid_grade(text_clean)
    table['fog'] = textstat.gunning_fog(text_clean)
    table['smog'] = textstat.smog_index(text_clean)
    table['ari'] = textstat.automated_readability_index(text_clean)
    table['coleman_liau'] = textstat.coleman_liau_index(text_clean)

    r_read_mets = quanteda.textstat_readability(text_clean, measure='all')
    table['ari_r'] = float(r_read_mets[1].r_repr())
    table['rix_r'] = float(r_read_mets[35].r_repr())
    table['Coleman_Liau_Grade_R'] = float(r_read_mets[9].r_repr())
    table['Coleman_Liau_Short_R'] = float(r_read_mets[10].r_repr())
    table['Danielson_Bryan_R'] = float(r_read_mets[14].r_repr())
    table['Dickes_Steiwer_R'] = float(r_read_mets[16].r_repr())
    table['ELF_R'] = float(r_read_mets[18].r_repr())
    table['Farr_Jenkins_Paterson_R'] = float(r_read_mets[19].r_repr())
    table['flesch_R'] = float(r_read_mets[20].r_repr())
    table['flesh_kincaid_R'] = float(r_read_mets[22].r_repr())
    table['FORCAST_R'] = float(r_read_mets[26].r_repr())
    table['Fucks_R'] = float(r_read_mets[28].r_repr())
    table['FOG_R'] = float(r_read_mets[23].r_repr())
    table['Linsear_Write_R'] = float(r_read_mets[29].r_repr())
    table['nWS_R'] = float(r_read_mets[31].r_repr())
    table['SMOG_R'] = float(r_read_mets[37].r_repr())
    table['Strain_R'] = float(r_read_mets[43].r_repr())
    table['Wheeler_Smith_R'] = float(r_read_mets[46].r_repr())

    return table

Exemple #8

0

Afficher le fichier

def seven_test(processed_essay):
    """
    score which is assigned to every script in on the basis of some predifened fomulas
    These scores are known as readability score.
    flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write
    :param processed_essay:
    :return:flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write
    """
    flesch_score = ["FS"]
    gunning_index = ["GI"]
    kincaid_grade = ["KG"]
    liau_index = ["LI"]
    automated_readability_index = ["ARI"]
    dale_readability_score = ["DLS"]
    difficult_word = ["DW"]
    linsear_write = ["LW"]
    for v in processed_essay:
        flesch_score.append(textstat.flesch_reading_ease(str(v)))
        gunning_index.append(textstat.gunning_fog(str(v)))
        kincaid_grade.append(textstat.flesch_kincaid_grade(str(v)))
        liau_index.append(textstat.coleman_liau_index(str(v)))
        automated_readability_index.append(textstat.automated_readability_index(str(v)))
        dale_readability_score.append(textstat.dale_chall_readability_score(str(v)))
        difficult_word.append(textstat.difficult_words(str(v)))
        linsear_write.append(textstat.linsear_write_formula(str(v)))
    return flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write

Exemple #9

0

Afficher le fichier

Fichier : Team_Three_LSTM.py Projet : manishagupta15/Automated-Essay-Scoring-System

def feature_getter(text):
    try:
        text=text.decode('utf-8')
    except:
        pass
    text1=re.sub(r'[^\x00-\x7F]+',' ', text)
    ##text1=re.sub('\n','. ', text)
    text=text1
    features=[]
    tokens=[]
    sentences = nltk.sent_tokenize(text)
    [tokens.extend(nltk.word_tokenize(sentence)) for sentence in sentences]
    
    syllable_count = textstat.syllable_count(text, lang='en_US')
    word_count = textstat.lexicon_count(text, removepunct=True)

    flesch = textstat.flesch_reading_ease(text)
    readability = textstat.automated_readability_index(text)

    features.append(len(sentences)) #num_sentences
    features.append(syllable_count) #num_sentences
    features.append(word_count) #num_sentences
    features.append(flesch) #num_sentences
    features.append(readability) #num_sentences       
    return features

Exemple #10

0

Afficher le fichier

Fichier : eval_generated_text.py Projet : YusukeSuzuki1213/text-readability-eval

def create_csv(config):
    csv_path = config.get('Paths','CsvPath')
    result_path = config.get('Paths','ResultPath')
    csv_name = config.get('Paths', 'CsvName')
    result_list = [] # タプル: (読み込んだcsvの行番号, 生成された文のARI, 生成された文, 生成された文の日本語訳)
    csv_title = ("読み込んだcsvの行番号", "word1", "word2", "FN", "word1 FE","word2 FE","生成された文のARI", "生成された文", "生成された文の日本語訳")
    words = csv_name.split("_") # ['water','pen.csv']
    word1 = words[0] # 'water'
    word2 = words[1].split(".")[0] # 'pen'

    with open(csv_path, 'r') as f:
        for i, row in enumerate(csv.reader(f)):
            current_row_generated_sentences = row[2].split('\n')
            fn = row[3]
            word1_fe = row[4]
            word2_fe = row[5]
            for sentence  in current_row_generated_sentences:
                if len(sentence) != 0:
                    result_list.append(
                        #(i,textstat.automated_readability_index(sentence),sentence, Translator().translate(sentence, dest = 'ja').text)
                        (i, word1, word2, fn, word1_fe, word2_fe, textstat.automated_readability_index(sentence),sentence, "リクエスト制限")
                    )
            print(i)
    del result_list[0]
    result_list.sort(key=lambda tup: tup[6]) # ARIでソート
    
    with open(result_path, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(csv_title)
        for row in result_list:
            writer.writerow(row)

Exemple #11

0

Afficher le fichier

def readability(queries):
    scores = pd.DataFrame(columns=[
        'Flesch', 'Smog', 'Flesch grade', 'Coleman', 'Automated', 'Dale',
        'Difficult', 'Linsear', 'Gunning', 'Text Standard'
    ])

    scores = {
        'Flesch': [],
        'Smog': [],
        'Flesch grade': [],
        'Coleman': [],
        'Automated': [],
        'Dale': [],
        'Difficult': [],
        'Linsear': [],
        'Gunning': [],
        'Text Standard': []
    }
    for line in queries:
        # results = readability.getmeasures(line, lang='en')
        # frescores.append(results['readability grades']['FleschReadingEase'])
        # line = 'yao family wines . yao family wines is a napa valley producer founded in 2011 by yao ming , the chinese-born , five-time nba all star . now retired from the houston rockets , yao ming is the majority owner in yao family wines , which has entered the wine market with a luxury cabernet sauvignon sourced from napa valley vineyards .'
        scores['Flesch'].append(textstat.flesch_reading_ease(line))
        scores['Smog'].append(textstat.smog_index(line))
        scores['Flesch grade'].append(textstat.flesch_kincaid_grade(line))
        scores['Coleman'].append(textstat.coleman_liau_index(line))
        scores['Automated'].append(textstat.automated_readability_index(line))
        scores['Dale'].append(textstat.dale_chall_readability_score(line))
        scores['Difficult'].append(textstat.difficult_words(line))
        scores['Linsear'].append(textstat.linsear_write_formula(line))
        scores['Gunning'].append(textstat.gunning_fog(line))
        scores['Text Standard'].append(
            textstat.text_standard(line, float_output=True))

    return scores

Exemple #12

0

Afficher le fichier

def analyze():
    print(request)
    str_to_read = request.data.decode("utf-8").strip()

    report = {
        "flesch-reading-ease":
        textstat.flesch_reading_ease(str_to_read),
        "smog-index":
        textstat.smog_index(str_to_read),
        "flesch-kincaid-grade":
        textstat.flesch_kincaid_grade(str_to_read),
        "coleman-liau-index":
        textstat.coleman_liau_index(str_to_read),
        "automated-readability-index":
        textstat.automated_readability_index(str_to_read),
        "dale-chall-readability-score":
        textstat.dale_chall_readability_score(str_to_read),
        "difficult-words":
        textstat.difficult_words(str_to_read),
        "linsear-write-formula":
        textstat.linsear_write_formula(str_to_read),
        "gunning-fog":
        textstat.gunning_fog(str_to_read),
        "text-standard":
        textstat.text_standard(str_to_read)
    }
    return decorate_response(jsonify(report))

Exemple #13

0

Afficher le fichier

def get_readability_score(text, metric="flesch"):
    global tknzr, DIFFICULT

    text = text.replace("’", "'")

    # https://pypi.org/project/textstat/
    if metric == "flesch":
        return textstat.flesch_reading_ease(text)
    elif metric == "smog":
        return textstat.smog_index(text)
    elif metric == "coleman_liau_index":
        return textstat.coleman_liau_index(text)
    elif metric == "automated_readability_index":
        return textstat.automated_readability_index(text)
    elif metric == "dale_chall_readability_score":
        return textstat.dale_chall_readability_score(text)
    elif metric == "difficult_words":
        nb_difficult = 0
        nb_easy = 0
        for w in set(tknzr.tokenize(text.lower())):
            if w not in EASY_WORDS and len(w) >= 6:
                nb_difficult += 1
            else:
                nb_easy += 1
        return 100 * nb_difficult / (nb_difficult + nb_easy)
        #return textstat.difficult_words(text)#/len(text.split())
    elif metric == "linsear_write_formula":
        return textstat.linsear_write_formula(text)
    elif metric == "gunning_fog":
        return textstat.gunning_fog(text)
    elif metric == "avg_word_length":
        words = tknzr.tokenize(text)
        words = [w for w in words if w not in misc_utils.PUNCT]
        if len(words) == 0: return 0
        return np.average([len(w) for w in words])

Exemple #14

0

Afficher le fichier

Fichier : text_ratings.py Projet : fergmack/prod_readability

def score(text):
    a = textstat.flesch_reading_ease(text)
    b = textstat.flesch_kincaid_grade(text)
    c = textstat.gunning_fog(text)
    d = textstat.smog_index(text)
    e = textstat.coleman_liau_index(text)
    f = textstat.automated_readability_index(text)
    return a, b, c, d, e, f

Exemple #15

0

Afficher le fichier

 def getReadability(df):
     import textstat
     df['ARI'] = df.headline_text.apply(
         lambda x: textstat.automated_readability_index(x))
     df['DCR'] = df.headline_text.apply(
         lambda x: textstat.dale_chall_readability_score(x))
     df['TS'] = df.headline_text.apply(
         lambda x: textstat.text_standard(x, float_output=True))
     return df

Exemple #16

0

Afficher le fichier

Fichier : 2_textual_variables_1993-2005.py Projet : zr-git/nn-scrapy

def do_datas():
    # logging.info('do_datas')

    ########### Save text statistics
    ##### 1. nw 2. nvocab 3. nsyllable 4.nsentence 5. tone 6. readability
    ## 1. nw
    nw.append(len(words))
    ## 2. nvocab
    nvocab.append(len(vocab))
    ## 3. syllable
    n = textstat.syllable_count(contents)
    nsyllable.append(n)
    ## 4. sentence
    n = textstat.sentence_count(contents)
    nsentence.append(n)
    ## 5. tone
    ### LM dictionary
    n_neg_lm.append(count_occurrence(words, lm_neg))
    n_pos_lm.append(count_occurrence(words, lm_pos))
    n_uctt_lm.append(count_occurrence(words, lm_uctt))
    n_lit_lm.append(count_occurrence(words, lm_lit))
    n_cstr_lm.append(count_occurrence(words, lm_cstr))
    n_modal1_lm.append(count_occurrence(words, lm_modal1))
    n_modal2_lm.append(count_occurrence(words, lm_modal2))
    n_modal3_lm.append(count_occurrence(words, lm_modal3))
    n_negation_lm.append(count_negation(words, lm_pos, gt_negation))
    ### General Inquirer dictionary
    n_neg_gi.append(count_occurrence(words, gi_neg))
    n_pos_gi.append(count_occurrence(words, gi_pos))
    n_negation_gi.append(count_negation(words, gi_pos, gt_negation))
    ### Henry dictionary
    n_neg_hr.append(count_occurrence(words, hr_neg))
    n_pos_hr.append(count_occurrence(words, hr_pos))
    n_negation_hr.append(count_negation(words, gi_pos, gt_negation))
    ## 4. readability
    fre_i = textstat.flesch_reading_ease(contents)
    if fre_i > 100:
        fre_i = 100
    if fre_i < 0:
        fre_i = float('NaN')
    fre.append(fre_i)
    fkg_i = textstat.flesch_kincaid_grade(contents)
    if fkg_i < 0:
        fkg_i = float('NaN')
    fkg.append(fkg_i)
    # RIX
    cl_i = textstat.coleman_liau_index(contents)
    if cl_i < 0:
        cl_i = float('NaN')
    cl.append(cl_i)
    f = textstat.gunning_fog(contents)
    fog.append(f)
    f = textstat.automated_readability_index(contents)
    ari.append(f)
    f = textstat.smog_index(contents)
    smog.append(f)

Exemple #17

0

Afficher le fichier

Fichier : textstat_readability.py Projet : MipedD/521158S-NLP-project

def ari_for_col(a_data, a_col):
    ari_col = []
    for review in a_data[a_col]:
        ari = -1
        try: ari = textstat.automated_readability_index(review)
        except: pass
            #print("unable to find ARI for", review)
        ari_col.append(ari)
    a_data["ari"] = ari_col
    return a_data

Exemple #18

0

Afficher le fichier

    def score(self, strText):
        self.automated_readability_index = textstat.automated_readability_index(
            strText)
        self.str_automated_readability_index = self.grade(
            self.automated_readability_index)

        self.coleman_liau_index = textstat.coleman_liau_index(strText)
        self.str_coleman_liau_index = self.grade(self.coleman_liau_index)

        self.dale_chall_readability_score = textstat.dale_chall_readability_score(
            strText)
        if self.dale_chall_readability_score >= 9.0:
            self.str_dale_chall_readability_score = ' | ' + '13th to 15th grade (college)'
        elif self.dale_chall_readability_score >= 8.0:
            self.str_dale_chall_readability_score = ' | ' + '11th to 12th grade'
        elif self.dale_chall_readability_score >= 7.0:
            self.str_dale_chall_readability_score = ' | ' + '9th to 10th grade'
        elif self.dale_chall_readability_score >= 6.0:
            self.str_dale_chall_readability_score = ' | ' + '7th to 8th grade'
        elif self.dale_chall_readability_score >= 5.0:
            self.str_dale_chall_readability_score = ' | ' + '5th to 6th grade'
        else:
            self.str_dale_chall_readability_score = ' | ' + '4th grade or lower'

        self.difficult_words = textstat.difficult_words(strText)

        self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(strText)
        self.str_flesch_kincaid_grade = self.grade(self.flesch_kincaid_grade)

        self.flesch_reading_ease = textstat.flesch_reading_ease(strText)
        if self.flesch_reading_ease >= 90:
            self.str_flesch_reading_ease = ' | ' + 'Very Easy'
        elif self.flesch_reading_ease >= 80:
            self.str_flesch_reading_ease = ' | ' + 'Easy'
        elif self.flesch_reading_ease >= 70:
            self.str_flesch_reading_ease = ' | ' + 'Fairly Easy'
        elif self.flesch_reading_ease >= 60:
            self.str_flesch_reading_ease = ' | ' + 'Standard'
        elif self.flesch_reading_ease >= 50:
            self.str_flesch_reading_ease = ' | ' + 'Fairly Difficult'
        elif self.flesch_reading_ease >= 30:
            self.str_flesch_reading_ease = ' | ' + 'Difficult'
        else:
            self.str_flesch_reading_ease = ' | ' + 'Very Confusing'

        self.gunning_fog = textstat.gunning_fog(strText)
        self.str_gunning_fog = self.grade(self.gunning_fog)

        self.linsear_write_formula = textstat.linsear_write_formula(strText)
        self.str_linsear_write_formula = self.grade(self.linsear_write_formula)

        self.smog_index = textstat.smog_index(strText)
        self.str_smog_index = self.grade(self.smog_index)

        self.text_standard = textstat.text_standard(strText)

Exemple #19

0

Afficher le fichier

Fichier : text_utils.py Projet : titipata/scipdf_parser

def compute_readability_stats(text):
    """
    Compute reading statistics of the given text
    Reference: https://github.com/shivam5992/textstat

    Parameters
    ==========
    text: str, input section or abstract text
    """
    try:
        readability_dict = {
            'flesch_reading_ease':
            textstat.flesch_reading_ease(text),
            'smog':
            textstat.smog_index(text),
            'flesch_kincaid_grade':
            textstat.flesch_kincaid_grade(text),
            'coleman_liau_index':
            textstat.coleman_liau_index(text),
            'automated_readability_index':
            textstat.automated_readability_index(text),
            'dale_chall':
            textstat.dale_chall_readability_score(text),
            'difficult_words':
            textstat.difficult_words(text),
            'linsear_write':
            textstat.linsear_write_formula(text),
            'gunning_fog':
            textstat.gunning_fog(text),
            'text_standard':
            textstat.text_standard(text),
            'n_syllable':
            textstat.syllable_count(text),
            'avg_letter_per_word':
            textstat.avg_letter_per_word(text),
            'avg_sentence_length':
            textstat.avg_sentence_length(text)
        }
    except:
        readability_dict = {
            'flesch_reading_ease': None,
            'smog': None,
            'flesch_kincaid_grade': None,
            'coleman_liau_index': None,
            'automated_readability_index': None,
            'dale_chall': None,
            'difficult_words': None,
            'linsear_write': None,
            'gunning_fog': None,
            'text_standard': None,
            'n_syllable': None,
            'avg_letter_per_word': None,
            'avg_sentence_length': None
        }
    return readability_dict

Exemple #20

0

Afficher le fichier

Fichier : a2_NLP.py Projet : jeffjohannsen/Predicting-Yelp-Review-Quality

 def create_readability_features(self):
     """
     Adds readability features using textstat library.
     Numbers represent grade level needed to understand the text.
     ari: Automated Readability Index
     """
     for df in [self.X_train, self.X_test]:
         df["review_text_readability_flesch_kincaid"] = df[
             "review_text"].apply(
                 lambda x: textstat.flesch_kincaid_grade(x))
         df["review_text_ari"] = df["review_text"].apply(
             lambda x: textstat.automated_readability_index(x))

Exemple #21

0

Afficher le fichier

Fichier : readability.py Projet : FlyingTwigs/Word_Statistics

 def generate_score(self, text):
     self.flesch_reading_grade = ts.flesch_reading_ease(text)
     self.flesch_reading_grade_consensus = readability_test_consensus(self.flesch_reading_grade, flesch_ease_grading_system)
     self.flesch_kincaid_grade = ts.flesch_kincaid_grade(text)
     self.flesch_kincaid_grade_consensus = readability_test_consensus(self.flesch_kincaid_grade, us_grade_level_system_age)
     self.dale_chall_grade = ts.dale_chall_readability_score(text)
     self.dale_chall_grade_consensus = readability_test_consensus(self.dale_chall_grade, dale_chall_system)
     self.smog_grade = ts.smog_index(text)
     self.ari_grade = ts.automated_readability_index(text)
     """  self.ari_grade_consensus = readability_test_consensus(self.ari_grade, us_grade_level_system_level) """
     self.coleman_liau_grade = ts.coleman_liau_index(text)
     pass

Exemple #22

0

Afficher le fichier

    def process(self, df):

        t0 = time()
        print("\n---Generating Readability Features:---\n")

        def lexical_diversity(text):
            words = nltk.tokenize.word_tokenize(text.lower())
            word_count = len(words)
            vocab_size = len(set(words))
            diversity_score = vocab_size / word_count
            return diversity_score

        def get_counts(text, word_list):
            words = nltk.tokenize.word_tokenize(text.lower())
            count = 0
            for word in words:
                if word in word_list:
                    count += 1
            return count

        df['flesch_reading_ease'] = df['articleBody'].map(lambda x: textstat.flesch_reading_ease(x))
        df['smog_index'] = df['articleBody'].map(lambda x: textstat.smog_index(x))
        df['flesch_kincaid_grade'] = df['articleBody'].map(lambda x: textstat.flesch_kincaid_grade(x))
        df['coleman_liau_index'] = df['articleBody'].map(lambda x: textstat.coleman_liau_index(x))
        df['automated_readability_index'] = df['articleBody'].map(lambda x: textstat.automated_readability_index(x))
        df['dale_chall_readability_score'] = df['articleBody'].map(lambda x: textstat.dale_chall_readability_score(x))
        df['difficult_words'] = df['articleBody'].map(lambda x: textstat.difficult_words(x))
        df['linsear_write_formula'] = df['articleBody'].map(lambda x: textstat.linsear_write_formula(x))
        df['gunning_fog'] = df['articleBody'].map(lambda x: textstat.gunning_fog(x))
        df['i_me_myself'] = df['articleBody'].apply(get_counts,args = (['i', 'me', 'myself'],))
        df['punct'] = df['articleBody'].apply(get_counts,args = ([',','.', '!', '?'],))
        df['lexical_diversity'] = df['articleBody'].apply(lexical_diversity)

        feats = ['flesch_reading_ease', 'smog_index', 'flesch_kincaid_grade',
        'coleman_liau_index', 'automated_readability_index', 
        'dale_chall_readability_score', 'difficult_words', 'linsear_write_formula',
        'gunning_fog', 'i_me_myself', 'punct', 'lexical_diversity'
        ]


        outfilename_xReadable = df[feats].values

        with open('../saved_data/read.pkl', 'wb') as outfile:
            pickle.dump(feats, outfile, -1)
            pickle.dump(outfilename_xReadable, outfile, -1)

        print ('readable features saved in read.pkl')
        
        print('\n---Readability Features is complete---')
        print("Time taken {} seconds\n".format(time() - t0))
        
        return 1

Exemple #23

0

Afficher le fichier

Fichier : complexity.py Projet : ghoulmann/more_eyes

 def readability_scores(self, text):
     self.ari = textstat.automated_readability_index(text)
     self.flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
     self.coleman_liau_index = textstat.coleman_liau_index(text)
     self.dale_chall_readability_score = textstat.dale_chall_readability_score(
         text)
     self.flesch_reading_ease = textstat.flesch_reading_ease(text)
     self.gunning_fog = textstat.gunning_fog(text)
     self.linsear_write_formula = textstat.linsear_write_formula(text)
     self.lix = textstat.lix(text)
     self.rix = textstat.rix(text)
     self.smog_index = textstat.smog_index(text)
     self.text_standard = textstat.text_standard(text)

Exemple #24

0

Afficher le fichier

def get_readability_stats(text):
    return {
        'flesch_reading_ease': textstat.flesch_reading_ease(text),
        'smog_index': textstat.smog_index(text),
        'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text),
        'coleman_liau_index': textstat.coleman_liau_index(text),
        'automated_readability_index':
        textstat.automated_readability_index(text),
        'dale_chall_readability_score':
        textstat.dale_chall_readability_score(text),
        'linsear_write_formula': textstat.linsear_write_formula(text),
        'gunning_fog': textstat.gunning_fog(text),
        'text_standard': textstat.text_standard(text, float_output=True),
    }

Exemple #25

0

Afficher le fichier

Fichier : textstat_readability.py Projet : MipedD/521158S-NLP-project

def simple_example_ari():
    test_data = (
        "Playing games has always been thought to be important to "
        "the development of well-balanced and creative children; "
        "however, what part, if any, they should play in the lives "
        "of adults has never been researched that deeply. I believe "
        "that playing games is every bit as important for adults "
        "as for children. Not only is taking time out to play games "
        "with our children and other adults valuable to building "
        "interpersonal relationships but is also a wonderful way "
        "to release built up tension."
    )
    print("Calculating automated readability index (ARI)")
    readability_index = textstat.automated_readability_index(test_data)
    print("ARI:", readability_index)

Exemple #26

0

Afficher le fichier

Fichier : vocabulary.py Projet : xkrieg/hyogen_helper

def vocab_check(text):
    
    #Construct dictionary
    vocab_results = {'dale_chall_readability_score': dale_chall_readability_score(text),
                     'smog_index': smog_index(text), 'gunning_fog': gunning_fog(text),
                     'flesch_reading_ease': flesch_reading_ease(text),
                     'flesch_kincaid_grade': flesch_kincaid_grade(text),
                     'linsear_write_formula': linsear_write_formula(text),
                     'coleman_liau_index': coleman_liau_index(text),
                     'automated_readability_index': automated_readability_index(text),
                     'yule_vocab_richness': yule(text),
                     'total_score': text_standard(text, float_output=True)}
                     
    diff_words, easy_word_dict = difficult_words(text)
    
    return(vocab_results, diff_words, easy_word_dict)

Exemple #27

0

Afficher le fichier

Fichier : mongo.py Projet : abir-chakroun/Machine-learning-project

def lisibilty(text):

    f_lis = ([
        textstat.syllable_count(str(text), lang='en_arabic'),
        textstat.lexicon_count(str(text), removepunct=True),
        textstat.sentence_count(str(text)),
        textstat.flesch_reading_ease(str(text)),
        textstat.flesch_kincaid_grade(str(text)),
        textstat.gunning_fog(str(text)),
        textstat.smog_index(str(text)),
        textstat.automated_readability_index(str(text)),
        textstat.coleman_liau_index(str(text)),
        textstat.linsear_write_formula(str(text)),
        textstat.dale_chall_readability_score(str(text))
    ])
    return f_lis

Exemple #28

0

Afficher le fichier

Fichier : trollreddit.py Projet : LewkowskiArkadiusz/artykul

def calculate_ari(dataframe):

	df = dataframe
	ari_values = []
	for name in df['name']:
		
		df_count = pd.read_sql("""
			SELECT body FROM May2015
			WHERE name == '{}'
			""".format(name),sql_conn)
		
	 	tmp_str = ''.join(df_count['body'])
		ari_value = textstat.automated_readability_index(tmp_str)
		ari_values.append(ari_value)
	df['ARI_value'] =  ari_values
	df.to_csv('test.csv', encoding='utf-8')

Exemple #29

0

Afficher le fichier

Fichier : scheduler.py Projet : Intellinewz/intellinews_api

def analyze_vocab(text):
    return {
        'num_words': textstat.lexicon_count(text),
        'flesch_reading_ease': textstat.flesch_reading_ease(text),
        'smog_index': textstat.smog_index(text),
        'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text),
        'coleman_liau_index': textstat.coleman_liau_index(text),
        'automated_readability_index':
        textstat.automated_readability_index(text),
        'dale_chall_readability_score':
        textstat.dale_chall_readability_score(text),
        'difficult_words': textstat.difficult_words(text),
        'linsear_write_formula': textstat.linsear_write_formula(text),
        'gunning_fog': textstat.gunning_fog(text),
        'text_standard': textstat.text_standard(text, float_output=True)
    }

Exemple #30

0

Afficher le fichier

Fichier : readability_calc.py Projet : ocbier/cpsc503_final_project

def calcReadabilityScores(content, basename, stats=[], outFile=""):

    scores = {
        "flesch_reading_ease":
        textstat.flesch_reading_ease(content),
        "gunning_fog":
        textstat.gunning_fog(content),
        "automated_readability_index":
        textstat.automated_readability_index(content),
        "coleman_liau_index":
        textstat.coleman_liau_index(content)
    }

    for metric in scores:
        if scores[
                metric] > 0.0:  #Ignore scores that are 0, as this is an error.
            stats.append([basename, metric, scores[metric]])

Exemple #31

0

Afficher le fichier

Fichier : textstat.py Projet : suraj-swaroop/Wikiplugin

def textstat_stats(text):
    difficulty = textstat.flesch_reading_ease(text)
    grade_difficulty = textstat.flesch_kincaid_grade(text)
    gfog = textstat.gunning_fog(text)
    smog = textstat.smog_index(text)
    ari = textstat.automated_readability_index(text)
    cli = textstat.coleman_liau_index(text)
    lwf = textstat.linsear_write_formula(text)
    dcrs = textstat.dale_chall_readability_score(text)
    idx = [
        'difficulty', 'grade_difficulty', 'gfog', 'smog', 'ari', 'cli', 'lwf',
        'dcrs'
    ]

    return pd.Series(
        [difficulty, grade_difficulty, gfog, smog, ari, cli, lwf, dcrs],
        index=idx)

Exemple #32

0

Afficher le fichier

Fichier : test.py Projet : shivam5992/textstat

def test_automated_readability_index():
    index = textstat.automated_readability_index(long_test)

    assert index == 12.3