Esempi in Python per flesch_reading_ease, esempi in Python per textstat.flesch_reading_ease

Esempio n. 1

0

Mostra file

File: information_retrieval_model.py Progetto: Billliu1993/Crowdfunding-Project-Idea-Validator

    def lex_readability(self, text, mode='fre'):

        if mode == 'all':
            fre_score = textstat.flesch_reading_ease(text)
            fog_index = textstat.gunning_fog(text)
            fkg_index = textstat.flesch_kincaid_grade(text)
            dcr_score = textstat.dale_chall_readability_score(text)
            text_standard = textstat.text_standard(text, float_output=True)
            return fre_score, fog_index, fkg_index, dcr_score, text_standard

        if mode == 'fre':
            fre_score = textstat.flesch_reading_ease(text)
            return fre_score

        if mode == 'fog':
            fog_index = textstat.gunning_fog(text)
            return fog_index

        if mode == 'fkg':
            fkg_index = textstat.flesch_kincaid_grade(text)
            return fkg_index

        if mode == 'dcr':
            dcr_score = textstat.dale_chall_readability_score(text)
            return dcr_score

        if mode == 'text_std':
            text_standard = textstat.text_standard(text, float_output=True)
            return text_standard

Esempio n. 2

0

Mostra file

 def check_difficulty(self):
     text = self.textoutput
     #0-30 = college
     #50-60 = high school
     #60+ = middle school/elementary school
     try:
         grade_level = textstat.text_standard(text)
         reading_ease = textstat.flesch_reading_ease(text)  #requires chart
         sentence_count = textstat.sentence_count(text)
         difficult_words = self.get_difficult_words(text)
         replacement_words = self.get_replacement_words(difficult_words)
         output = "Grade Level of Input Text: " + grade_level + "\n"
         #output = output + "Ease of Reading*: " + str(reading_ease) + "\n"
         output = output + "Sentence Count: " + str(sentence_count) + "\n"
         output = output + "Difficult Words Found: " + str(
             len(difficult_words)) + "\n"
         output = output + "Possible Replacements: " + "\n"
         for dw in replacement_words:
             output = output + dw + " -> "
             for word in replacement_words[dw]:
                 output = output + word + ", "
             output = output + "\n"
         self.difficultyReport = output
     except:
         self.difficultyReport = "Error determining Difficulties"

Esempio n. 3

0

Mostra file

File: DataAnalysis.py Progetto: pra8eek/BiopicAnalysis

def getReadabilityMetrics(test_data):
    '''
        for a given article IN TEXT FORMAT, returns its readability metrics
        Uses textstat library, please install it
    '''
    metric = {
        "flesch_reading_ease":
        textstat.flesch_reading_ease(test_data),
        "smog_index":
        textstat.smog_index(test_data),
        "flesch_kincaid_grade":
        textstat.flesch_kincaid_grade(test_data),
        "coleman_liau_index":
        textstat.coleman_liau_index(test_data),
        "automated_readability_index":
        textstat.automated_readability_index(test_data),
        "dale_chall_readability_score":
        textstat.dale_chall_readability_score(test_data),
        "difficult_words":
        textstat.difficult_words(test_data),
        "linsear_write_formula":
        textstat.linsear_write_formula(test_data),
        "gunning_fog":
        textstat.gunning_fog(test_data),
        "text_standard":
        textstat.text_standard(test_data)
    }
    return metric

Esempio n. 4

0

Mostra file

File: utils.py Progetto: roosterbell/APTS_ApplicantParsingAndTrackingSystem

def readability(text):
    """
    Provides the readability grade for the text. Here we are using the
    flesch reading ease score. Higher the score, easier to read

    text: input text on which score has to be calculated
    """
    score = textstat.flesch_reading_ease(text)
    grade = round(textstat.flesch_kincaid_grade(text))

    if score > 90:
        summary = "Very easy to read. Easily understood by an average 11-year-old student; "
    elif score > 80:
        summary = "Easy to read. Conversational English for consumers"
    elif score > 70:
        summary = "Fairly easy to read"
    elif score > 60:
        summary = "Plain English. Easily understood by 13- to 15-year-old students."
    elif score > 50:
        summary = "Fairly difficult to read."
    elif score > 30:
        summary = "Difficult to read"
    else:
        summary = "Very difficult to read. Best understood by university graduates."

    return score, summary, grade

Esempio n. 5

0

Mostra file

def analyze():
    print(request)
    str_to_read = request.data.decode("utf-8").strip()

    report = {
        "flesch-reading-ease":
        textstat.flesch_reading_ease(str_to_read),
        "smog-index":
        textstat.smog_index(str_to_read),
        "flesch-kincaid-grade":
        textstat.flesch_kincaid_grade(str_to_read),
        "coleman-liau-index":
        textstat.coleman_liau_index(str_to_read),
        "automated-readability-index":
        textstat.automated_readability_index(str_to_read),
        "dale-chall-readability-score":
        textstat.dale_chall_readability_score(str_to_read),
        "difficult-words":
        textstat.difficult_words(str_to_read),
        "linsear-write-formula":
        textstat.linsear_write_formula(str_to_read),
        "gunning-fog":
        textstat.gunning_fog(str_to_read),
        "text-standard":
        textstat.text_standard(str_to_read)
    }
    return decorate_response(jsonify(report))

Esempio n. 6

0

Mostra file

def seven_test(processed_essay):
    """
    score which is assigned to every script in on the basis of some predifened fomulas
    These scores are known as readability score.
    flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write
    :param processed_essay:
    :return:flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write
    """
    flesch_score = ["FS"]
    gunning_index = ["GI"]
    kincaid_grade = ["KG"]
    liau_index = ["LI"]
    automated_readability_index = ["ARI"]
    dale_readability_score = ["DLS"]
    difficult_word = ["DW"]
    linsear_write = ["LW"]
    for v in processed_essay:
        flesch_score.append(textstat.flesch_reading_ease(str(v)))
        gunning_index.append(textstat.gunning_fog(str(v)))
        kincaid_grade.append(textstat.flesch_kincaid_grade(str(v)))
        liau_index.append(textstat.coleman_liau_index(str(v)))
        automated_readability_index.append(textstat.automated_readability_index(str(v)))
        dale_readability_score.append(textstat.dale_chall_readability_score(str(v)))
        difficult_word.append(textstat.difficult_words(str(v)))
        linsear_write.append(textstat.linsear_write_formula(str(v)))
    return flesch_score,gunning_index,kincaid_grade,liau_index,automated_readability_index,dale_readability_score,difficult_word,linsear_write

Esempio n. 7

0

Mostra file

File: feature_engineering.py Progetto: aktilot/insight2019

def textstat_stats(text):
    doc_length = len(text.split()) 
    flesch_ease = ts.flesch_reading_ease(text) #Flesch Reading Ease Score
    flesch_grade = ts.flesch_kincaid_grade(text) #Flesch-Kincaid Grade Level
    gfog = ts.gunning_fog(text) # FOG index, also indicates grade level
#    smog = ts.smog_index(text) # SMOG index, also indicates grade level, only useful on 30+ sentences
    auto_readability = ts.automated_readability_index(text) #approximates the grade level needed to comprehend the text.
    cl_index = ts.coleman_liau_index(text) #grade level of the text using the Coleman-Liau Formula.
    lw_formula = ts.linsear_write_formula(text) #grade level using the Linsear Write Formula.
    dcr_score = ts.dale_chall_readability_score(text) #uses a lookup table of the most commonly used 3000 English words
#    text_standard = ts.text_standard(text, float_output=False) # summary of all the grade level functions
    syll_count = ts.syllable_count(text, lang='en_US')
    syll_count_scaled = syll_count / doc_length
    lex_count = ts.lexicon_count(text, removepunct=True)
    lex_count_scaled = lex_count / doc_length
    idx = ['flesch_ease', 'flesch_grade','gfog',
           'auto_readability','cl_index','lw_formula',
           'dcr_score', 
#           'text_standard', 
           'syll_count', 'lex_count']
    return pd.Series([flesch_ease, flesch_grade, gfog, 
                      auto_readability, cl_index, lw_formula, 
                      dcr_score, 
#                      text_standard, 
                      syll_count_scaled, lex_count_scaled], index = idx)

Esempio n. 8

0

Mostra file

def get_stats(text):
    fre = textstat.flesch_reading_ease(text)
    smog = textstat.smog_index(text)
    fkg = textstat.flesch_kincaid_grade(text)
    cli = textstat.coleman_liau_index(text)
    ari = textstat.automated_readability_index(text)
    dcr = textstat.dale_chall_readability_score(text)
    diff_words = textstat.difficult_words(text)
    lwf = textstat.linsear_write_formula(text)
    gunn_fog = textstat.gunning_fog(text)
    consolidated_score = textstat.text_standard(text)

    doc_length = len(text)  # think about excluding spaces?
    quote_count = text.count('"')

    stats = {
        "flesch_reading_ease": fre,
        "smog_index": smog,
        "flesch_kincaid_grade": fkg,
        "coleman_liau_index": cli,
        "automated_readability_index": ari,
        "dale_chall_readability_score": dcr,
        "difficult_words": diff_words,
        "linsear_write_formula": lwf,
        "gunning_fog": gunn_fog,
        "consolidated_score": consolidated_score,
        "doc_length": doc_length,
        "quote_count": quote_count
    }
    return stats

Esempio n. 9

0

Mostra file

File: Team_Three_LSTM.py Progetto: manishagupta15/Automated-Essay-Scoring-System

def feature_getter(text):
    try:
        text=text.decode('utf-8')
    except:
        pass
    text1=re.sub(r'[^\x00-\x7F]+',' ', text)
    ##text1=re.sub('\n','. ', text)
    text=text1
    features=[]
    tokens=[]
    sentences = nltk.sent_tokenize(text)
    [tokens.extend(nltk.word_tokenize(sentence)) for sentence in sentences]
    
    syllable_count = textstat.syllable_count(text, lang='en_US')
    word_count = textstat.lexicon_count(text, removepunct=True)

    flesch = textstat.flesch_reading_ease(text)
    readability = textstat.automated_readability_index(text)

    features.append(len(sentences)) #num_sentences
    features.append(syllable_count) #num_sentences
    features.append(word_count) #num_sentences
    features.append(flesch) #num_sentences
    features.append(readability) #num_sentences       
    return features

Esempio n. 10

0

Mostra file

File: score.py Progetto: hemangi-coder/hackathon

def score(full):
    st.header(textstat.flesch_reading_ease(full))
    st.write('Flesch Reading Ease Score')
    text = """90-100 Very Easy,70-79 Fairly Easy,60-69 Standard,50-59Fairly Difficult,30-49 Difficult,0-29 Very 
    Confusing """
    st.write(text, key=1)

    st.header(textstat.smog_index(full))
    st.write('Smog Index Score')
    text = "Returns the SMOG index of the given text.This is a grade formula in that a score of 9.3 means that a ninth " \
           "grader would be able to read the document.Texts of fewer than 30 sentences are statistically invalid, " \
           "because the SMOG formula was normed on 30-sentence samples. textstat requires at least 3 sentences for a " \
           "result. "
    st.write(text, key=2)

    st.header(textstat.dale_chall_readability_score(full))
    st.write('Dale Chall Readability Score')
    text = """Different from other tests, since it uses a lookup table of the most commonly used 3000 English words. 
            Thus it returns the grade level using the New Dale-Chall Formula.
            4.9 or lower	average 4th-grade student or lower
            5.0–5.9	average 5th or 6th-grade student
            6.0–6.9	average 7th or 8th-grade student
            7.0–7.9	average 9th or 10th-grade student
            8.0–8.9	average 11th or 12th-grade student
            9.0–9.9	average 13th to 15th-grade (college) student"""
    st.write(text, key=3)

Esempio n. 11

0

Mostra file

File: app.py Progetto: fagan2888/Natural-Language-Processing-City-of-Los-Angeles-Job-Descriptions

def terms_and_weights(sample):
    sentences = list()
    file_path = f"data/Job Bulletins/{sample}"
    with open(file_path) as file:
        reading_score = textstat.flesch_reading_ease(file_path)
        reading_score_2 = textstat.dale_chall_readability_score(file_path)
        for line in file:
            for l in re.split(r"\.\s|\?\s|\!\s|\n", line):
                if l:
                    sentences.append(l)
    cvec = CountVectorizer(stop_words='english',
                           min_df=3,
                           max_df=0.5,
                           ngram_range=(1, 2))
    sf = cvec.fit_transform(sentences)
    transformer = TfidfTransformer()
    transformed_weights = transformer.fit_transform(sf)
    weights = np.asarray(transformed_weights.mean(axis=0)).ravel().tolist()
    weights_df = pd.DataFrame({
        'term': cvec.get_feature_names(),
        'weight': weights
    })
    weights_df = weights_df.sort_values(by='weight', ascending=False).head(10)
    myList = {
        "term": weights_df.term.tolist(),
        "weight": weights_df.weight.tolist(),
        "scores": [reading_score, reading_score_2]
    }
    file.close()
    return jsonify(myList)

Esempio n. 12

0

Mostra file

File: buildFeatures.py Progetto: Khamar-Uz-Zama/ATiML-Semester-Assignment

def extractFRSAllHTMLFiles():
    """
    Extract Fleisch Reading Scores (FRS).
    The documents and indexes are read again as the FRS is given for the 
    original text and not the processed text
    """
    
    Path1 = 'Gutenberg_English_Fiction_1k'
    Path2 = 'Gutenberg_English_Fiction_1k'
    HTMLFilesPath = 'Gutenberg_19th_century_English_Fiction'
    FRSScores = []
    badIndexes = []
    dataPath = os.path.join(os.getcwd(),Path1,Path2, HTMLFilesPath)
    data = pp.readIndexes()
    
    for i in range(len(data)):
        print(i)
        htmlFilePath = os.path.join(dataPath,data['book_id'][i])[:-5] + '-content.html'
        corpus = pp.readHTMLFile(htmlFilePath)
        if corpus:
            score = textstat.flesch_reading_ease(corpus)

            FRSScores.append(score)
        else:
            badIndexes.append(i)
            
    with open(FRSFile, 'wb') as f:
        pickle.dump(FRSScores,f)

Esempio n. 13

0

Mostra file

File: fleschIndex.py Progetto: william-wen/privacy_policy_analysis

def fleschscore() -> List:
    """return flesch reading ease score
    """
    flesch_list = []
    for text in policies['Policy']:
        flesch_list.append(textstat.flesch_reading_ease(text))
    return flesch_list

Esempio n. 14

0

Mostra file

def readability(queries):
    scores = pd.DataFrame(columns=[
        'Flesch', 'Smog', 'Flesch grade', 'Coleman', 'Automated', 'Dale',
        'Difficult', 'Linsear', 'Gunning', 'Text Standard'
    ])

    scores = {
        'Flesch': [],
        'Smog': [],
        'Flesch grade': [],
        'Coleman': [],
        'Automated': [],
        'Dale': [],
        'Difficult': [],
        'Linsear': [],
        'Gunning': [],
        'Text Standard': []
    }
    for line in queries:
        # results = readability.getmeasures(line, lang='en')
        # frescores.append(results['readability grades']['FleschReadingEase'])
        # line = 'yao family wines . yao family wines is a napa valley producer founded in 2011 by yao ming , the chinese-born , five-time nba all star . now retired from the houston rockets , yao ming is the majority owner in yao family wines , which has entered the wine market with a luxury cabernet sauvignon sourced from napa valley vineyards .'
        scores['Flesch'].append(textstat.flesch_reading_ease(line))
        scores['Smog'].append(textstat.smog_index(line))
        scores['Flesch grade'].append(textstat.flesch_kincaid_grade(line))
        scores['Coleman'].append(textstat.coleman_liau_index(line))
        scores['Automated'].append(textstat.automated_readability_index(line))
        scores['Dale'].append(textstat.dale_chall_readability_score(line))
        scores['Difficult'].append(textstat.difficult_words(line))
        scores['Linsear'].append(textstat.linsear_write_formula(line))
        scores['Gunning'].append(textstat.gunning_fog(line))
        scores['Text Standard'].append(
            textstat.text_standard(line, float_output=True))

    return scores

Esempio n. 15

0

Mostra file

File: DE_main.py Progetto: zzs-NLP/ACS-QG

def get_readibility(text, metric="flesch_kincaid_grade"):
    """
    Return a score which reveals a piece of text's readability level.
    Reference: https://chartbeat-labs.github.io/textacy/getting_started/quickstart.html
               https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests
    """
    if metric == "flesch_kincaid_grade":
        result = textstat.flesch_kincaid_grade(text)
    elif metric == "flesch_reading_ease":
        result = textstat.flesch_reading_ease(text)
    elif metric == "smog_index":
        result = textstat.smog_index(text)
    elif metric == "coleman_liau_index":
        result = textstat.coleman_liau_index(text)
    elif metric == "automated_readability_index":
        result = textstat.automated_readability_index(text)
    elif metric == "dale_chall_readability_score":
        result = textstat.dale_chall_readability_score(text)
    elif metric == "difficult_words":
        result = textstat.difficult_words(text)
    elif metric == "linsear_write_formula":
        result = textstat.linsear_write_formula(text)
    elif metric == "gunning_fog":
        result = textstat.gunning_fog(text)
    elif metric == "text_standard":
        result = textstat.text_standard(text)
    else:
        print("ERROR: Please select correct metric!")
        result = None
    return result

Esempio n. 16

0

Mostra file

File: test.py Progetto: vargeus/textstat

def test_changing_lang_clears_cache():
    textstat.set_lang("en_US")

    # Clear any cache and call reading ease
    textstat.flesch_reading_ease.cache_clear()
    textstat.flesch_reading_ease(short_test)

    # Check the cache has only been missed once
    assert textstat.flesch_reading_ease.cache_info().misses == 1

    # Change the language and recall reading ease
    textstat.set_lang("fr")
    textstat.flesch_reading_ease(short_test)

    # Check the cache hasn't been hit again
    assert textstat.flesch_reading_ease.cache_info().misses == 1

Esempio n. 17

0

Mostra file

File: FeatureGenerator.py Progetto: rajprasad001/book_genre_identification

 def addFleschReadingScore(self):
     'do something here'
     score = []
     for each in self.data['content'].iteritems():
         each = self.removeHTMLTags(each[1])
         score.append(textstat.flesch_reading_ease(each))
     self.data['Flesch_Score'] = score

Esempio n. 18

0

Mostra file

def get_readability_score(text, metric="flesch"):
    global tknzr, DIFFICULT

    text = text.replace("’", "'")

    # https://pypi.org/project/textstat/
    if metric == "flesch":
        return textstat.flesch_reading_ease(text)
    elif metric == "smog":
        return textstat.smog_index(text)
    elif metric == "coleman_liau_index":
        return textstat.coleman_liau_index(text)
    elif metric == "automated_readability_index":
        return textstat.automated_readability_index(text)
    elif metric == "dale_chall_readability_score":
        return textstat.dale_chall_readability_score(text)
    elif metric == "difficult_words":
        nb_difficult = 0
        nb_easy = 0
        for w in set(tknzr.tokenize(text.lower())):
            if w not in EASY_WORDS and len(w) >= 6:
                nb_difficult += 1
            else:
                nb_easy += 1
        return 100 * nb_difficult / (nb_difficult + nb_easy)
        #return textstat.difficult_words(text)#/len(text.split())
    elif metric == "linsear_write_formula":
        return textstat.linsear_write_formula(text)
    elif metric == "gunning_fog":
        return textstat.gunning_fog(text)
    elif metric == "avg_word_length":
        words = tknzr.tokenize(text)
        words = [w for w in words if w not in misc_utils.PUNCT]
        if len(words) == 0: return 0
        return np.average([len(w) for w in words])

Esempio n. 19

0

Mostra file

    def get_under_score_text(self , score):
        """Too low score means the complex words being used in these sentence"""
        reading =  self.content.\
       apply(lambda x : flesch_reading_ease(x))
        x=[i for i in range(len(reading)) if reading[i]<score]
#         displacy(self.content.iloc[x].head())
        return self.content.iloc[x]

Esempio n. 20

0

Mostra file

def main():
    df = pd.DataFrame(columns=['Utility', 'FK Score', 'FK Grade Level'])

    for x in glob.glob('pdfs/*.pdf'):
        try:
            text = parser.from_file(x)
            df = df.append(
                {
                    'Utility':
                    str(x).split('\\')[1].split('.')[0],
                    'FK Score':
                    textstat.flesch_reading_ease(text['content']),
                    'FK Grade Level':
                    textstat.flesch_kincaid_grade(text['content'])
                },
                ignore_index=True)
        except:
            df = df.append(
                {
                    'Utility': str(x).split('\\')[1].split('.')[0],
                    'FK Score': 'N/A',
                    'FK Grade Level': 'N/A'
                },
                ignore_index=True)

    df.to_csv('data/results/readability_results.csv', encoding='utf-8')

Esempio n. 21

0

Mostra file

File: FillMeIn.py Progetto: nicholasshort/FillMeIn

def complexityFunction(text, level):
    words = []
    words = text.split()
    selectedWords = []
    for i in words:
        if (textstat.flesch_reading_ease(i) <= level):
            selectedWords.append(i)
    return selectedWords

Esempio n. 22

0

Mostra file

def sentence_by_sentence_analysis(cleanedtext):
    blob = TextBlob(cleanedtext)
    split_text=blob.sentences
    df=pd.DataFrame((''.join(split_text[i]) for i in range(len(split_text))),columns=['Sentences'])
    df["Sentence Word Count"]= pd.DataFrame(len(df["Sentences"][i].split()) for i in range(len(df)))
    df["FS_GradeScore"] = pd.DataFrame((textstat.flesch_reading_ease(df["Sentences"][i]) for i in range(len(df))))
    df[["TextBlob_Polarity","TextBlob_Subjectivity"]]=round(pd.DataFrame((split_text[i].sentiment for i in range(len(split_text))))*100,1)
    return df

Esempio n. 23

0

Mostra file

File: text_ratings.py Progetto: fergmack/prod_readability

def score(text):
    a = textstat.flesch_reading_ease(text)
    b = textstat.flesch_kincaid_grade(text)
    c = textstat.gunning_fog(text)
    d = textstat.smog_index(text)
    e = textstat.coleman_liau_index(text)
    f = textstat.automated_readability_index(text)
    return a, b, c, d, e, f

Esempio n. 24

0

Mostra file

File: test.py Progetto: rinaldo-rex/textstat

def test_flesch_reading_ease():
    textstat.set_lang("en_US")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 64.75

    textstat.set_lang("de_DE")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 63.1

    textstat.set_lang("es_ES")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 84.37

    textstat.set_lang("fr_FR")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 80.31

    textstat.set_lang("it_IT")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 89.27

    textstat.set_lang("nl_NL")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 61.97

    textstat.set_lang("ru_RU")
    score = textstat.flesch_reading_ease(long_test)

    assert score == 116.45

Esempio n. 25

0

Mostra file

File: app.py Progetto: goobta/Presidental-Tweet-Analysis

def reading_level():
    if request.method != 'POST':
        print('Not a post request')
        return 'NOTAVALIDPATH:'

    sentence = request.get_json()['sentence']
    level = textstat.flesch_reading_ease(sentence)

    return json.dumps({'level': level})

Esempio n. 26

0

Mostra file

File: mapreduced_rdg_level.py Progetto: denizturkcapar/Predicting-Yelp-Elite-status

    def reducer(self, user, reviews):

        OUTPUT_PROTOCOL = CsvProtocol

        reviews = ' '.join(reviews)
        metric = textstat.flesch_reading_ease(reviews)
        writed = user + ', ' + str(metric) + '\n'
        data.write(writed)
        yield user, metric

Esempio n. 27

0

Mostra file

File: newspeak.py Progetto: wilber-guy/NewSpeak

def get_readability(text):
    # takes in a string sentece and returns a int of rating
    #    print(text, '\n')
    # returns a score 120 to negative infinity. Higher scores are easier to read
    rating = textstat.flesch_reading_ease(text)
    #    print(rating, '\n')
    # returns grade level to comprehend the text, many different methods varify results
    #    print(textstat.text_standard(text, float_output=False), '\n \n')
    return rating

Esempio n. 28

0

Mostra file

File: feature_extractor.py Progetto: prafulladiwesh/genreidentification

def extractor(path_to_file):
    book_data = html_parser.extract_text(path_to_file)
    blob = TextBlob(book_data)

    sentiment_start, sentiment_end = start_end_sentiment(blob)
    sentences_count, avg_sentence_len, word_count, proper_noun_count = book_structure(blob, path_to_file)
    flesch_score = textstat.flesch_reading_ease(book_data)

    return sentiment_start, sentiment_end, sentences_count, avg_sentence_len, flesch_score ,word_count, proper_noun_count

Esempio n. 29

0

Mostra file

File: 2_textual_variables_1993-2005.py Progetto: zr-git/nn-scrapy

def do_datas():
    # logging.info('do_datas')

    ########### Save text statistics
    ##### 1. nw 2. nvocab 3. nsyllable 4.nsentence 5. tone 6. readability
    ## 1. nw
    nw.append(len(words))
    ## 2. nvocab
    nvocab.append(len(vocab))
    ## 3. syllable
    n = textstat.syllable_count(contents)
    nsyllable.append(n)
    ## 4. sentence
    n = textstat.sentence_count(contents)
    nsentence.append(n)
    ## 5. tone
    ### LM dictionary
    n_neg_lm.append(count_occurrence(words, lm_neg))
    n_pos_lm.append(count_occurrence(words, lm_pos))
    n_uctt_lm.append(count_occurrence(words, lm_uctt))
    n_lit_lm.append(count_occurrence(words, lm_lit))
    n_cstr_lm.append(count_occurrence(words, lm_cstr))
    n_modal1_lm.append(count_occurrence(words, lm_modal1))
    n_modal2_lm.append(count_occurrence(words, lm_modal2))
    n_modal3_lm.append(count_occurrence(words, lm_modal3))
    n_negation_lm.append(count_negation(words, lm_pos, gt_negation))
    ### General Inquirer dictionary
    n_neg_gi.append(count_occurrence(words, gi_neg))
    n_pos_gi.append(count_occurrence(words, gi_pos))
    n_negation_gi.append(count_negation(words, gi_pos, gt_negation))
    ### Henry dictionary
    n_neg_hr.append(count_occurrence(words, hr_neg))
    n_pos_hr.append(count_occurrence(words, hr_pos))
    n_negation_hr.append(count_negation(words, gi_pos, gt_negation))
    ## 4. readability
    fre_i = textstat.flesch_reading_ease(contents)
    if fre_i > 100:
        fre_i = 100
    if fre_i < 0:
        fre_i = float('NaN')
    fre.append(fre_i)
    fkg_i = textstat.flesch_kincaid_grade(contents)
    if fkg_i < 0:
        fkg_i = float('NaN')
    fkg.append(fkg_i)
    # RIX
    cl_i = textstat.coleman_liau_index(contents)
    if cl_i < 0:
        cl_i = float('NaN')
    cl.append(cl_i)
    f = textstat.gunning_fog(contents)
    fog.append(f)
    f = textstat.automated_readability_index(contents)
    ari.append(f)
    f = textstat.smog_index(contents)
    smog.append(f)

Esempio n. 30

0

Mostra file

    def doc_calc(self, article):
        """Helper code to compute average word length of a name"""
        flesch_ease = textstat.flesch_reading_ease(article)
        flesch_grade = textstat.flesch_kincaid_grade(article)
        gunning = textstat.gunning_fog(article)
        profanity = predict_prob([article])[0]
        polarity = TextBlob(article).sentiment.polarity

        return pd.Series(
            [flesch_ease, flesch_grade, gunning, profanity, polarity])

Esempio n. 31

0

Mostra file

File: test.py Progetto: shivam5992/textstat

def test_flesch_reading_ease():
    score = textstat.flesch_reading_ease(long_test)

    assert score == 64.75

Esempio n. 32

0

Mostra file

File: read_ease.py Progetto: pallavikaran/Machine-Learning

import openpyxl
wb = openpyxl.load_workbook('testing.xlsx')
ws = wb.get_sheet_by_name('testing_set')

import textstat
from textstat.textstat import textstat

for i in range(2, 591):
	score = 0
	f_essay = ws.cell(row = i, column = 3)
	essay = f_essay.value
	score = textstat.flesch_reading_ease(essay)
	ws.cell(row = i, column = 11).value = score
wb.save('testing.xlsx')

Esempio n. 33

0

Mostra file

File: QuestionAnalyzer.py Progetto: HainaLi/Software-Project

def getReadabilityScore(title):
	print textstat.flesch_reading_ease(title[0])