Python Readability.ari Beispiele

Programmiersprache: Python

Namespace / Paketname: readability

Klasse / Typ: Readability

Methode / Funktion: ari

Beispiele auf hotexamples.com: 10

Python Readability.ari - 10 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die readability.Readability.ari, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Readability(30)

flesch_kincaid(17)

FleschKincaidGradeLevel(10)

ari(10)

flesch(9)

dale_chall(9)

smog(9)

gunning_fog(8)

coleman_liau(8)

FleschReadingEase(7)

linsear_write(7)

SMOGIndex(7)

spache(6)

ARI(6)

ColemanLiauIndex(5)

RIX(5)

LIX(5)

GunningFogIndex(5)

content(2)

statistics(2)

colemanLiau(1)

fog(1)

getContent(1)

getImages(1)

getReadability(1)

get_article(1)

get_title(1)

calculate_readability(1)

kincaid(1)

save_content(1)

Beispiel #1

Datei anzeigen

Datei: corpus-to-reading-level.py Projekt: pjt82/building-blocks

def __ari(r: Readability) -> float:
    try:
        lvls = r.ari().grade_levels        
        if 'college_graduate' in lvls:
            return 17
        elif 'college' in lvls:
            return 13
        elif 'K' in lvls:
            return 0
        else:
            return stat.mean([float(lvl) for lvl in lvls])
    except ReadabilityException:
        return None

Beispiel #2

Datei anzeigen

 def doc_to_readability(doc_str) -> ArrayField:
     if len(doc_str) < 10:
         return ArrayField(np.zeros(7))
     str_to_read = doc_str
     try:
         while len(str_to_read.split()) < 150:
             str_to_read += " " + doc_str
         r = Readability(str_to_read)
         r_scores = [
             r.flesch_kincaid().score,
             r.flesch().score,
             r.gunning_fog().score,
             r.coleman_liau().score,
             r.dale_chall().score,
             r.ari().score,
             r.linsear_write().score
         ]
         return ArrayField(np.array(r_scores))
     except ReadabilityException:
         return ArrayField(np.zeros(7))

Beispiel #3

Datei anzeigen

Datei: demo.py Projekt: sil-ai/aqua-demo


#------------------
# Readability
#------------------

st.header('Readability')

# Context 
passage = st.text_area("Candidate Bible Passage (English)", value='', 
        max_chars=None, key='readability_passage')

# Calculate readability
r = Readability(passage)

# Display readability
data = [
        ['Flesch-Kincaid Score', r.flesch_kincaid().score],
        ['Flesch Reading Ease', r.flesch().ease],
        ['Dale Chall Readability Score', r.dale_chall().score],
        ['Automated Readability Index Score', r.ari().score],
        ['Coleman Liau Index', r.coleman_liau().score],
        ['Gunning Fog', r.gunning_fog().score],
        ['Linsear Write', r.linsear_write().score],
        ['Spache Readability Formula', r.spache().score]
        ]
df = pd.DataFrame(data, columns=['Readability Metric', 'Value'])
if st.button('Assess Readability', key=None):
    st.write(df)

Beispiel #4

Datei anzeigen

class ReadabilityTest(unittest.TestCase):
    def setUp(self):
        text = """
        In linguistics, the Gunning fog index is a readability test for English writing. The index estimates the years of formal education a person needs to understand the text on the first reading. For instance, a fog index of 12 requires the reading level of a United States high school senior (around 18 years old). The test was developed in 1952 by Robert Gunning, an American businessman who had been involved in newspaper and textbook publishing.
        The fog index is commonly used to confirm that text can be read easily by the intended audience. Texts for a wide audience generally need a fog index less than 12. Texts requiring near-universal understanding generally need an index less than 8.
        """
        self.readability = Readability(text)

    def test_ari(self):
        r = self.readability.ari()
        print(r)
        self.assertEqual(9.551245421245422, r.score)
        self.assertEqual(['10'], r.grade_levels)
        self.assertEqual([15, 16], r.ages)

    def test_coleman_liau(self):
        r = self.readability.coleman_liau()
        print(r)
        self.assertEqual(10.673162393162393, r.score)
        self.assertEqual('11', r.grade_level)

    def test_dale_chall(self):
        r = self.readability.dale_chall()
        print(r)
        self.assertEqual(9.32399010989011, r.score)
        self.assertEqual(['college'], r.grade_levels)

    def test_flesch(self):
        r = self.readability.flesch()
        print(r)
        self.assertEqual(51.039230769230784, r.score)
        self.assertEqual(['10', '11', '12'], r.grade_levels)
        self.assertEqual('fairly_difficult', r.ease)

    def test_flesch_kincaid(self):
        r = self.readability.flesch_kincaid()
        print(r)
        self.assertEqual(10.125531135531137, r.score)
        self.assertEqual('10', r.grade_level)

    def test_gunning_fog(self):
        r = self.readability.gunning_fog()
        print(r)
        self.assertEqual(12.4976800976801, r.score)
        self.assertEqual('12', r.grade_level)

    def test_linsear_write(self):
        r = self.readability.linsear_write()
        print(r)
        self.assertEqual(11.214285714285714, r.score)
        self.assertEqual('11', r.grade_level)

    def test_smog(self):
        text = """
        In linguistics, the Gunning fog index is a readability test for English writing. The index estimates the years of formal education a person needs to understand the text on the first reading. For instance, a fog index of 12 requires the reading level of a United States high school senior (around 18 years old). The test was developed in 1952 by Robert Gunning, an American businessman who had been involved in newspaper and textbook publishing.
        The fog index is commonly used to confirm that text can be read easily by the intended audience. Texts for a wide audience generally need a fog index less than 12. Texts requiring near-universal understanding generally need an index less than 8.
        """
        text = ' '.join(text for i in range(0, 5))

        readability = Readability(text)
        r = readability.smog()

        print(r)
        self.assertEqual(12.516099999999998, r.score)
        self.assertEqual('13', r.grade_level)

    def test_spache(self):
        r = self.readability.spache()
        print(r)
        self.assertEqual(7.164945054945054, r.score)
        self.assertEqual('7', r.grade_level)

    def test_print_stats(self):
        stats = self.readability.statistics()
        self.assertEqual(562, stats['num_letters'])
        self.assertEqual(117, stats['num_words'])
        self.assertEqual(7, stats['num_sentences'])
        self.assertEqual(20, stats['num_polysyllabic_words'])

Beispiel #5

Datei anzeigen

def fetch_Data_pushshift(Search_User,Search_Subs_List):
    Fetch_Data = {}
    _c_comment_lengths = {}
    _c_comment_texts = {}
    logger.debug("fetch_Data_pushshift user=%s" % Search_User)

    c_count = 0
    comments = get_author_comments_pushshift(author=Search_User,size=1000,sort='desc',sort_type='created_utc')
    for comment in comments:
        commentsub=comment['subreddit'].lower()
        if commentsub in Search_Subs_List:
            if commentsub not in Fetch_Data:
                Fetch_Data[commentsub] = {}
                Fetch_Data[commentsub]['c_karma'] = 0
                Fetch_Data[commentsub]['c_count'] = 0
                Fetch_Data[commentsub]['s_karma'] = 0
                Fetch_Data[commentsub]['s_count'] = 0
                _c_comment_texts[commentsub] = ""
                _c_comment_lengths[commentsub] = []

            Fetch_Data[commentsub]['c_karma'] += comment['score']
            Fetch_Data[commentsub]['c_count'] += 1
            _c_comment_texts[commentsub] += comment.body
            _c_length = len(comment.body.split())
            _c_comment_lengths[commentsub].append(_c_length)

    s_count = 0
    submissions = get_author_submissions_pushshift(author=Search_User,size=1000,sort='desc',sort_type='created_utc')
    for submit in submissions:
        if 'subreddit' in submit:
            submitsub=submit['subreddit'].lower()
            if submitsub in Search_Subs_List:
                if submitsub not in Fetch_Data:
                    Fetch_Data[submitsub] = {}
                    Fetch_Data[submitsub]['c_karma'] = 0
                    Fetch_Data[submitsub]['c_count'] = 0
                    Fetch_Data[submitsub]['s_karma'] = 0
                    Fetch_Data[submitsub]['s_count'] = 0
                Fetch_Data[submitsub]['s_karma'] += submit['score']
                Fetch_Data[submitsub]['s_count'] += 1

    # Process comment data
    for sreddit in Fetch_Data:
        if sreddit in _c_comment_texts:
            words = nltk.word_tokenize(_c_comment_texts[sreddit])
            words = [ word for word in words if len(word) > 3]
            words = [ word.lower() for word in words ]
            words = [ word for word in words if word not in default_stopwords ]
            words = [ word for word in words if word not in string.punctuation ]
        else:
            words = nltk.word_tokenize('')
        fdist = nltk.FreqDist(words)
        wordlist = []
        for topword, frequency in fdist.most_common(3):
            wordlist.append(topword)
        topwords = ', '.join(wordlist)
        Fetch_Data[sreddit]['top_words'] = topwords
        if sreddit in _c_comment_lengths:
            Fetch_Data[sreddit]['c_median_length'] = statistics.median(_c_comment_lengths[sreddit])
            if len(words) > 100:
                r = Readability(_c_comment_texts[sreddit])
                Fetch_Data[sreddit]['grade_level'] = r.ari().grade_levels[0]
            else:
                Fetch_Data[sreddit]['grade_level'] = ''
        else:
            Fetch_Data[sreddit]['c_median_length'] = 0
            Fetch_Data[sreddit]['grade_level'] = ''


    # mark other subs searched as empty
    for sreddit in Search_Subs_List:
        if sreddit not in Fetch_Data:
            Fetch_Data[sreddit] = {}
            Fetch_Data[sreddit]['c_karma'] = 0
            Fetch_Data[sreddit]['c_count'] = 0
            Fetch_Data[sreddit]['s_karma'] = 0
            Fetch_Data[sreddit]['s_count'] = 0
            Fetch_Data[sreddit]['c_median_length'] = 0
            Fetch_Data[sreddit]['top_words'] = ''
            Fetch_Data[sreddit]['grade_level'] = ''

    return Fetch_Data

Beispiel #6

Datei anzeigen

def fetch_Data_reddit(reddit, Search_User, Search_Subs_List, Expiration=14):
    Fetch_Data = {}
    _c_comment_lengths = {}
    _c_comment_texts = {}
    c_count = 0

    logger.debug("fetch_Data_reddit user=%s" % Search_User)
    for comment in reddit.redditor(Search_User).comments.new(limit=1000):
        commentsub=comment.subreddit.display_name.lower()
        if commentsub in Search_Subs_List:
            if commentsub not in Fetch_Data:
                Fetch_Data[commentsub] = {}
                Fetch_Data[commentsub]['c_karma'] = 0
                Fetch_Data[commentsub]['c_count'] = 0
                Fetch_Data[commentsub]['s_karma'] = 0
                Fetch_Data[commentsub]['s_count'] = 0
                Fetch_Data[commentsub]['p_count'] = 0
                Fetch_Data[commentsub]['p_pct'] = ''
                _c_comment_texts[commentsub] = ""
                _c_comment_lengths[commentsub] = []

            Fetch_Data[commentsub]['c_karma'] += comment.score
            Fetch_Data[commentsub]['c_count'] += 1
            _c_comment_texts[commentsub] += comment.body
            _c_length = len(comment.body.split())
            _c_comment_lengths[commentsub].append(_c_length)
            if profanity.contains_profanity(comment.body):
               Fetch_Data[commentsub]['p_count'] += 1
                  
    s_count = 0
    for submit in reddit.redditor(Search_User).submissions.new(limit=1000):
            submitsub=submit.subreddit.display_name.lower()
            if submitsub in Search_Subs_List:
                if submitsub not in Fetch_Data:
                    Fetch_Data[submitsub] = {}
                    Fetch_Data[submitsub]['c_karma'] = 0
                    Fetch_Data[submitsub]['c_count'] = 0
                    Fetch_Data[submitsub]['s_karma'] = 0
                    Fetch_Data[submitsub]['s_count'] = 0
                    Fetch_Data[submitsub]['p_count'] = 0
                    Fetch_Data[submitsub]['p_pct'] = ''
                Fetch_Data[submitsub]['s_karma'] += submit.score
                Fetch_Data[submitsub]['s_count'] += 1

    # Process comment data
    for sreddit in Fetch_Data:
        if sreddit in _c_comment_texts:
            words = nltk.word_tokenize(_c_comment_texts[sreddit])
            words = [ word for word in words if len(word) > 3]
            words = [ word.lower() for word in words ]
            words = [ word for word in words if word not in default_stopwords ]
            words = [ word for word in words if word not in string.punctuation ]
        else:
            words = nltk.word_tokenize('')
        fdist = nltk.FreqDist(words)
        wordlist = []
        for topword, frequency in fdist.most_common(3):
            wordlist.append(topword)
        topwords = ', '.join(wordlist)
        Fetch_Data[sreddit]['top_words'] = topwords 
        if sreddit in _c_comment_lengths:
            Fetch_Data[sreddit]['c_median_length'] = statistics.median(_c_comment_lengths[sreddit])
            if len(words) > 100:
                r = Readability(_c_comment_texts[sreddit])
                Fetch_Data[sreddit]['grade_level'] = r.ari().grade_levels[0]
            else:
                Fetch_Data[sreddit]['grade_level'] = ''
        else:
            Fetch_Data[sreddit]['c_median_length'] = 0
            Fetch_Data[sreddit]['grade_level'] = ''

        if Fetch_Data[sreddit]['p_count'] > 0 and Fetch_Data[sreddit]['c_count'] > 0:
            p_percent = Fetch_Data[sreddit]['p_count'] / Fetch_Data[sreddit]['c_count'] * 100
            Fetch_Data[sreddit]['p_pct'] = '{0:.1f}%'.format(p_percent)
        else:
            Fetch_Data[sreddit]['p_pct'] = ''

    # mark other subs searched as empty
    for sreddit in Search_Subs_List:
        if sreddit not in Fetch_Data:
            Fetch_Data[sreddit] = {}
            Fetch_Data[sreddit]['c_karma'] = 0
            Fetch_Data[sreddit]['c_count'] = 0
            Fetch_Data[sreddit]['s_karma'] = 0
            Fetch_Data[sreddit]['s_count'] = 0
            Fetch_Data[sreddit]['p_count'] = 0
            Fetch_Data[sreddit]['p_pct'] = ''
            Fetch_Data[sreddit]['c_median_length'] = 0
            Fetch_Data[sreddit]['top_words'] = ''
            Fetch_Data[sreddit]['grade_level'] = ''

    logger.debug("FETCH: %s" % Fetch_Data)
    return Fetch_Data

Beispiel #7

Datei anzeigen

Datei: main.py Projekt: carjam/AuthorTools

def main():
    # Read and normalise input text
    content = sys.stdin.read()
    tu = TextUtility(content)
    cleanText = tu.normalizeText()

    #removedInfrequent = tu.tokenizeAndRemoveCommonWords(4)
    #print("Without frequent words: ", removedInfrequent)

    #Readability measures
    print("\n*** Readability ***")
    readability = Readability(content)
    Kincaid = readability.kincaid()
    sys.stdout.write('Kincaid (school grade level): %f\n' % (Kincaid))

    ARI_score = readability.ari()
    sys.stdout.write('ARI (school grade level): %f\n' % (ARI_score))

    ColemanLiau = readability.colemanLiau()
    sys.stdout.write('ColemanLiau (school grade level): %f\n' % (ColemanLiau))

    Flesch = readability.flesch()
    sys.stdout.write('Flesch: 0=12th grade, 100=4th grade %f\n' % (Flesch))

    Fog = readability.fog()
    sys.stdout.write('Fog: grade level: %f\n' % (Fog))

    SMOG = readability.smog()
    sys.stdout.write('SMOG: years of education needed to comprehend: %f\n' %
                     (SMOG))

    print("\n*** Lexical Diversity ***")
    diversity = LexicalDiversity(content)
    yulei = diversity.yulei()
    sys.stdout.write('Yule I Lexical Diversity: %f\n' % yulei)

    word_entropy = diversity.calcWordEntropy()
    sys.stdout.write('Word entropy: %f \n' % (word_entropy))

    synonym_suggestions = diversity.recommendSynonyms(90, 3)
    sys.stdout.write(
        '\nHere are some synonyms for frequently occuring words:\n')
    for word in synonym_suggestions.keys():
        print(word, synonym_suggestions[word])

    print("\n*** Description ***")
    word_probability = WordProbability(content)
    hashtags = word_probability.hashtagSuggestions(99)
    sys.stdout.write('Hashtag suggestions %s\n' % hashtags)

    tfidf = word_probability.tfidf(5)
    print("tfidf: ", tfidf)

    entrSyl = diversity.entropySyllable(5)
    entrSylWords = [entry[0] for entry in entrSyl]
    print("sylEntropy: ", entrSylWords)

    summary = word_probability.summary(55)
    sys.stdout.write('\nSummary %s\n' % summary)

    print("\n*** Search/Plagarism ***")
    plagarism = TextSearch()
    txt = content
    pat = "empowers consumers to take control"
    setPat = {"empowers consumers", "a million bank acc", "Contact UsPrivacy "}

    plg = plagarism.rabinKarp(setPat, txt)
    sys.stdout.write('Rabin-Karp found pattern at %s \n' % str(plg))

    #wildSearch = 'tsfd;lkasdi*ghasd'
    wildSearch = 'the*est'
    #empowers consumers to take control of their financial
    #wildSearch = 'empowers c*to take*of their financial'
    #wildSearch = 'consumers*take control of their'
    wild = plagarism.wildCardSearch(wildSearch, txt)
    sys.stdout.write('Wildcard search found %s \n' % str(wild))

    print("\n*** Cliche Detection ***")
    plagarism.findCliches(content)

Beispiel #8

Datei anzeigen

class ReadabilityTest(unittest.TestCase):
    def setUp(self):
        text = """
        “On a June day sometime in the early 1990s, encouraged by his friend and fellow economist Jörgen Weibull, Abhijit went swimming in the Baltic. He leaped in and instantly jumped out—he claims that his teeth continued to chatter for the next three days. In 2018, also in June, we went to the Baltic in Stockholm, several hundred miles farther north than the previous encounter. This time it was literally child’s play; our children frolicked in the water.
        Wherever we went in Sweden, the unusually warm weather was a topic of conversation. It was probably a portent of something everyone felt, but for the moment it was hard not to be quite delighted with the new opportunities for outdoor life it offered.”. 
        """
        self.readability = Readability(text)

    def test_ari(self):
        r = self.readability.ari()
        print(r)
        self.assertEqual(9.551245421245422, r.score)
        self.assertEqual(['10'], r.grade_levels)
        self.assertEqual([15, 16], r.ages)

    def test_coleman_liau(self):
        r = self.readability.coleman_liau()
        print(r)
        self.assertEqual(10.673162393162393, r.score)
        self.assertEqual('11', r.grade_level)

    def test_dale_chall(self):
        r = self.readability.dale_chall()
        print(r)
        self.assertEqual(9.32399010989011, r.score)
        self.assertEqual(['college'], r.grade_levels)

    def test_flesch(self):
        r = self.readability.flesch()
        print(r)
        self.assertEqual(51.039230769230784, r.score)
        self.assertEqual(['10', '11', '12'], r.grade_levels)
        self.assertEqual('fairly_difficult', r.ease)

    def test_flesch_kincaid(self):
        r = self.readability.flesch_kincaid()
        print(r)
        self.assertEqual(10.125531135531137, r.score)
        self.assertEqual('10', r.grade_level)

    def test_gunning_fog(self):
        r = self.readability.gunning_fog()
        print(r)
        self.assertEqual(12.4976800976801, r.score)
        self.assertEqual('12', r.grade_level)

    def test_linsear_write(self):
        r = self.readability.linsear_write()
        print(r)
        self.assertEqual(11.214285714285714, r.score)
        self.assertEqual('11', r.grade_level)

    def test_smog(self):
        text = """
        “On a June day sometime in the early 1990s, encouraged by his friend and fellow economist Jörgen Weibull, Abhijit went swimming in the Baltic. He leaped in and instantly jumped out—he claims that his teeth continued to chatter for the next three days. In 2018, also in June, we went to the Baltic in Stockholm, several hundred miles farther north than the previous encounter. This time it was literally child’s play; our children frolicked in the water.
        Wherever we went in Sweden, the unusually warm weather was a topic of conversation. It was probably a portent of something everyone felt, but for the moment it was hard not to be quite delighted with the new opportunities for outdoor life it offered.”. 
        """
        text = ' '.join(text for i in range(0, 5))

        readability = Readability(text)

        #Test SMOG with 30 sentences
        r1 = readability.smog()

        #Test SMOG with all sentences
        r2 = readability.smog(all_sentences=True)

        print("all_sentences=False: %s ; all_sentences=True: %s" % (r1, r2))
        self.assertEqual(12.516099999999998, r1.score)
        self.assertEqual('13', r1.grade_level)

        self.assertEqual(12.785403640627713, r2.score)
        self.assertEqual('13', r2.grade_level)

    def test_spache(self):
        r = self.readability.spache()
        print(r)
        self.assertEqual(7.164945054945054, r.score)
        self.assertEqual('7', r.grade_level)

    def test_print_stats(self):
        stats = self.readability.statistics()
        self.assertEqual(562, stats['num_letters'])
        self.assertEqual(117, stats['num_words'])
        self.assertEqual(7, stats['num_sentences'])
        self.assertEqual(20, stats['num_polysyllabic_words'])

Beispiel #9

Datei anzeigen

Datei: readabilityAnalysis.py Projekt: jperdek/semanticAnalysis

class ReadabilityAnalyser:
    def __init__(self, text):
        self.readability = Readability(text)
        self.FLESCH_KINCAID = ['score', 'grade_level']
        self.FLESCH_EASE = ['score', 'ease', 'grade_level']
        self.DALE_CHALL = ['score', 'grade_level']
        self.ARI = ['score', 'grade_level', 'ages']
        self.CLI = ['score', 'grade_level']
        self.GUNNING_FOG = ['score', 'grade_level']
        self.SMOG = ['score', 'grade_level']
        self.SPACHE = ['score', 'grade_level']
        self.LINSEAR_WRITE = ['score', 'grade_level']
        self.values_index = self.initialize_value_index_array()

    def initialize_value_index_array(self):
        values_index = dict()
        values_index["flesch_kincaid"] = self.FLESCH_KINCAID
        values_index["flesch_ease"] = self.FLESCH_EASE
        values_index["dale_chall"] = self.DALE_CHALL
        values_index["ari"] = self.ARI
        values_index["cli"] = self.CLI
        values_index["gunning_fog"] = self.GUNNING_FOG
        values_index["smog_all"] = self.SMOG
        values_index["smog"] = self.SMOG
        values_index["spache"] = self.SPACHE
        values_index["linsear_write"] = self.LINSEAR_WRITE
        return values_index

    def flesch_kincaid(self, content, error_ignore=True):
        try:
            record = dict()
            fk = self.readability.flesch_kincaid()
            record['score'] = fk.score
            record['grade_level'] = fk.grade_level
            content["flesch_kincaid"] = record
        except ReadabilityException as e:
            if not error_ignore:
                content["flesch_kincaid"] = str(e)
                print(e)

    def flesch_ease(self, content, error_ignore=True):
        try:
            record = dict()
            flesch_ease = self.readability.flesch()
            record['score'] = flesch_ease.score
            record['ease'] = flesch_ease.ease
            record['grade_levels'] = flesch_ease.grade_levels
            content['flesch_ease'] = record
        except ReadabilityException as e:
            if not error_ignore:
                content['flesch_ease'] = str(e)
                print(e)

    def dale_chall(self, content, error_ignore=True):
        try:
            record = dict()
            dale_chall = self.readability.dale_chall()
            record['score'] = dale_chall.score
            record['grade_level'] = dale_chall.grade_levels
            content['dale_chall'] = record
        except ReadabilityException as e:
            if not error_ignore:
                content['dale_chall'] = str(e)
                print(e)

    def automated_readability_index(self, content, error_ignore=True):
        try:
            record = dict()
            ari = self.readability.ari()
            record['score'] = ari.score
            record['grade_level'] = ari.grade_levels
            record['ages'] = ari.ages
            content['ari'] = record
        except ReadabilityException as e:
            if not error_ignore:
                content['ari'] = str(e)
                print(e)

    def coleman_liau_index(self, content, error_ignore=True):
        try:
            record = dict()
            coleman_liau = self.readability.coleman_liau()
            record['score'] = coleman_liau.score
            record['grade_level'] = coleman_liau.grade_level
            content['cli'] = record
            print(record)
        except ReadabilityException as e:
            print(e)
            if not error_ignore:
                content['cli'] = str(e)
                print(e)

    def gunning_fog_index(self, content, error_ignore=True):
        try:
            record = dict()
            gunning_fog = self.readability.gunning_fog()
            record['score'] = gunning_fog.score
            record['grade_level'] = gunning_fog.grade_level
            content['gunning_fog'] = record
        except ReadabilityException as e:
            if not error_ignore:
                content['gunning_fog'] = str(e)
                print(e)

    def smog(self, content, all_sentences=False, error_ignore=True):
        record = dict()
        try:
            if all_sentences:
                smog = self.readability.smog(all_sentences=all_sentences)
                record['score'] = smog.score
                record['grade_level'] = smog.grade_level
                content['smog_all'] = record
            else:
                smog = self.readability.smog()
                record['score'] = smog.score
                record['grade_level'] = smog.grade_level
                content['smog'] = record
        except ReadabilityException as e:
            print(e)
            print(error_ignore)
            if not error_ignore:
                if all_sentences:
                    content['smog_all'] = str(e)
                else:
                    content['smog'] = str(e)
                print(e)

    def spache_readability_formula(self, content, error_ignore=True):
        try:
            record = dict()
            spache = self.readability.spache()
            record['score'] = spache.score
            record['grade_level'] = spache.grade_level
            content['spache'] = record
        except ReadabilityException as e:
            if not error_ignore:
                content['spache'] = str(e)
                print(e)

    def linsear_write(self, content, error_ignore=True):
        try:
            record = dict()
            linsear_write = self.readability.linsear_write()
            record['score'] = linsear_write.score
            record['grade_level'] = linsear_write.grade_level
            content['linsear_write'] = record
        except ReadabilityException as e:
            if not error_ignore:
                content['linsear_write'] = str(e)
                print(e)

    @staticmethod
    def check_readability_from_file(input_json_file, output_json_file):
        result = []
        json_file = load_as_json(input_json_file)
        for record in json_file:
            analyser = ReadabilityAnalyser(record['text'])
            analysed_file_record = dict()
            analysed_file_record['file'] = record['file']
            analysed_file_record['category'] = record['category']
            analyser.flesch_kincaid(analysed_file_record)
            analyser.flesch_ease(analysed_file_record)
            analyser.dale_chall(analysed_file_record)
            analyser.automated_readability_index(analysed_file_record)
            analyser.coleman_liau_index(analysed_file_record)
            analyser.gunning_fog_index(analysed_file_record)
            analyser.smog(analysed_file_record)
            analyser.smog(analysed_file_record, True)
            analyser.spache_readability_formula(analysed_file_record)
            analyser.linsear_write(analysed_file_record)
            result.append(analysed_file_record)
        save_as_json(result, output_json_file)

    def check_readability(self, use_methods=None, errors_included=True):
        result_analysis = dict()
        if use_methods is None or 'flesch_kincaid' in use_methods:
            self.flesch_kincaid(result_analysis,
                                error_ignore=not errors_included)
        if use_methods is None or 'flesch_ease' in use_methods:
            self.flesch_ease(result_analysis, error_ignore=not errors_included)
        if use_methods is None or 'dale_chall' in use_methods:
            self.dale_chall(result_analysis, error_ignore=not errors_included)
        if use_methods is None or 'ari' in use_methods:
            self.automated_readability_index(result_analysis,
                                             error_ignore=not errors_included)
        if use_methods is None or 'cli' in use_methods:
            self.coleman_liau_index(result_analysis,
                                    error_ignore=not errors_included)
        if use_methods is None or 'gunning_fog' in use_methods:
            self.gunning_fog_index(result_analysis,
                                   error_ignore=not errors_included)
        if use_methods is None or 'smog' in use_methods:
            self.smog(result_analysis, error_ignore=not errors_included)
        if use_methods is None or 'smog_all' in use_methods:
            self.smog(result_analysis, True, error_ignore=not errors_included)
        if use_methods is None or 'spache' in use_methods:
            self.spache_readability_formula(result_analysis,
                                            error_ignore=not errors_included)
        if use_methods is None or 'linsear_write' in use_methods:
            self.linsear_write(result_analysis,
                               error_ignore=not errors_included)
        return result_analysis

    @staticmethod
    def initialize_basic_dict(categories, values, process_category=True):
        record = dict()
        for value in values:
            record = ReadabilityAnalyser.initialize_dict(record, value)
        if process_category:
            for category in categories:
                record[category] = ReadabilityAnalyser.initialize_basic_dict(
                    categories, values, False)
        return record

    @staticmethod
    def initialize_dict(record, value):
        record['min_' + value] = 999999999
        record['max_' + value] = -999999999
        record['sum_' + value] = 0
        record['avg_' + value] = 0
        record['freq_' + value] = 0
        record['skipped_' + value] = 0
        return record

    def initialize_values(self, statistic, categories):
        statistic["flesch_kincaid"] = self.initialize_basic_dict(
            categories, self.FLESCH_KINCAID)
        statistic["flesch_ease"] = self.initialize_basic_dict(
            categories, self.FLESCH_EASE)
        statistic["dale_chall"] = self.initialize_basic_dict(
            categories, self.DALE_CHALL)
        statistic["ari"] = self.initialize_basic_dict(categories, self.ARI)
        statistic["cli"] = self.initialize_basic_dict(categories, self.CLI)
        statistic["gunning_fog"] = self.initialize_basic_dict(
            categories, self.GUNNING_FOG)
        statistic["smog_all"] = self.initialize_basic_dict(
            categories, self.SMOG)
        statistic["smog"] = self.initialize_basic_dict(categories, self.SMOG)
        statistic["spache"] = self.initialize_basic_dict(
            categories, self.SPACHE)
        statistic["linsear_write"] = self.initialize_basic_dict(
            categories, self.LINSEAR_WRITE)
        statistic['indexes'] = [
            "flesch_kincaid", "flesch_ease", "dale_chall", "ari", "cli",
            "gunning_fog", "smog_all", "smog", "spache", "linsear_write"
        ]
        statistic['categories'] = categories

    @staticmethod
    def fill_min_max_sum_category(index, value_index, statistics,
                                  readability_index, category):
        if index[value_index] < statistics[readability_index][category][
                'min_' + value_index]:
            statistics[readability_index][category][
                'min_' + value_index] = index[value_index]
        if index[value_index] > statistics[readability_index][category][
                'max_' + value_index]:
            statistics[readability_index][category][
                'max_' + value_index] = index[value_index]
        statistics[readability_index][category]['sum_' + value_index] = \
            statistics[readability_index][category]['sum_' + value_index] + index[value_index]

    @staticmethod
    def fill_min_max_sum_category_value(value, value_index, statistics,
                                        readability_index, category):
        if value < statistics[readability_index][category]['min_' +
                                                           value_index]:
            statistics[readability_index][category]['min_' +
                                                    value_index] = value
        if value > statistics[readability_index][category]['max_' +
                                                           value_index]:
            statistics[readability_index][category]['max_' +
                                                    value_index] = value
        statistics[readability_index][category]['sum_' + value_index] = \
            statistics[readability_index][category]['sum_' + value_index] + value

    @staticmethod
    def fill_min_max_sum(index, value_index, statistics, readability_index):
        if index[value_index] < statistics[readability_index]['min_' +
                                                              value_index]:
            statistics[readability_index]['min_' +
                                          value_index] = index[value_index]
        if index[value_index] > statistics[readability_index]['max_' +
                                                              value_index]:
            statistics[readability_index]['max_' +
                                          value_index] = index[value_index]
        statistics[readability_index]['sum_' + value_index] = \
            statistics[readability_index]['sum_' + value_index] + index[value_index]

    @staticmethod
    def fill_min_max_sum_value(value, value_index, statistics,
                               readability_index):
        if value < statistics[readability_index]['min_' + value_index]:
            statistics[readability_index]['min_' + value_index] = value
        if value > statistics[readability_index]['max_' + value_index]:
            statistics[readability_index]['max_' + value_index] = value
        statistics[readability_index]['sum_' + value_index] = \
            statistics[readability_index]['sum_' + value_index] + value

    @staticmethod
    def cast_to_float(value):
        try:
            return float(value)
        except ValueError:
            return None
        except TypeError:
            return None

    def record_analysis(self, record, statistics):
        for readability_index in statistics['indexes']:
            if 'category' in record:
                category = record['category']
                if readability_index in record:
                    index = record[readability_index]
                    for value_index in self.values_index[readability_index]:
                        if value_index in index:
                            obtained_value = ReadabilityAnalyser.cast_to_float(
                                index[value_index])

                            if obtained_value is not None:
                                index[value_index] = obtained_value
                                ReadabilityAnalyser.fill_min_max_sum_category(
                                    index, value_index, statistics,
                                    readability_index, category)
                                if 'freq_' + value_index not in statistics[
                                        readability_index][category]:
                                    statistics[readability_index][category][
                                        'freq_' + value_index] = 0
                                statistics[readability_index][category]['freq_' + value_index] = \
                                    statistics[readability_index][category]['freq_' + value_index] + 1
                            elif isinstance(index[value_index], list):
                                for rec in index[value_index]:

                                    if value_index not in statistics[
                                            readability_index][category]:
                                        statistics[readability_index][
                                            category][value_index] = dict()
                                    if isinstance(rec, str):
                                        if 'freq_' + rec not in statistics[
                                                readability_index][category][
                                                    value_index]:
                                            statistics[readability_index][
                                                category][value_index]['freq_'
                                                                       +
                                                                       rec] = 0
                                        statistics[readability_index][category][value_index]['freq_' + rec] = \
                                            statistics[readability_index][category][value_index]['freq_' + rec] + 1
                                    else:
                                        ReadabilityAnalyser.fill_min_max_sum_category_value(
                                            rec, value_index, statistics,
                                            readability_index, category)
                                        if 'freq_' + value_index not in \
                                                statistics[readability_index][category][value_index]:
                                            statistics[readability_index][
                                                category]['freq_' +
                                                          value_index] = 0
                                        statistics[readability_index][category]['freq_' + value_index] = \
                                            statistics[readability_index][category]['freq_' + value_index] + 1
                            elif isinstance(index[value_index], str):
                                rec = index[value_index]
                                if value_index not in statistics[
                                        readability_index][category]:
                                    statistics[readability_index][category][
                                        value_index] = dict()
                                if 'freq_' + rec not in statistics[
                                        readability_index][category][
                                            value_index]:
                                    statistics[readability_index][category][
                                        value_index]['freq_' + rec] = 0
                                statistics[readability_index][category][value_index]['freq_' + rec] = \
                                    statistics[readability_index][category][value_index]['freq_' + rec] + 1
                            else:
                                print("Uncategorized: " +
                                      str(index[value_index]))

                            statistics[readability_index][category]['freq_' + value_index] = \
                                statistics[readability_index][category]['freq_' + value_index] + 1
                        else:
                            statistics[readability_index][category]['skipped_' + value_index] = \
                                statistics[readability_index][category]['skipped_' + value_index] + 1
                else:
                    for value_index in self.values_index[readability_index]:
                        statistics[readability_index][category]['skipped_' + value_index] = \
                            statistics[readability_index][category]['skipped_' + value_index] + 1
            else:
                print("THIS: " + record)

            if readability_index in record:
                index = record[readability_index]
                for value_index in self.values_index[readability_index]:
                    if value_index in index:
                        obtained_value = ReadabilityAnalyser.cast_to_float(
                            index[value_index])

                        if obtained_value is not None:
                            index[value_index] = float(index[value_index])
                            ReadabilityAnalyser.fill_min_max_sum(
                                index, value_index, statistics,
                                readability_index)
                            if 'freq_' + value_index not in statistics[
                                    readability_index]:
                                statistics[readability_index]['freq_' +
                                                              value_index] = 0
                            statistics[readability_index]['freq_' + value_index] = \
                                statistics[readability_index]['freq_' + value_index] + 1
                        elif isinstance(index[value_index], list):
                            for rec in index[value_index]:
                                if value_index not in statistics[
                                        readability_index]:
                                    statistics[readability_index][
                                        value_index] = dict()

                                if isinstance(rec, str):
                                    # print(value_index + " " + str(index[value_index]))
                                    if 'freq_' + rec not in statistics[
                                            readability_index][value_index]:
                                        statistics[readability_index][
                                            value_index]['freq_' + rec] = 0
                                    statistics[readability_index][value_index]['freq_' + rec] = \
                                        statistics[readability_index][value_index]['freq_' + rec] + 1
                                else:
                                    ReadabilityAnalyser.fill_min_max_sum_value(
                                        rec, value_index, statistics,
                                        readability_index)
                                    if 'freq_' + value_index not in statistics[
                                            readability_index][value_index]:
                                        statistics[readability_index][
                                            'freq_' + value_index] = 0
                                    statistics[readability_index]['freq_' + value_index] = \
                                        statistics[readability_index]['freq_' + value_index] + 1
                        elif isinstance(index[value_index], str):
                            rec = index[value_index]
                            if value_index not in statistics[
                                    readability_index]:
                                statistics[readability_index][
                                    value_index] = dict()
                            if 'freq_' + rec not in statistics[
                                    readability_index][value_index]:
                                statistics[readability_index][value_index][
                                    'freq_' + rec] = 0
                            statistics[readability_index][value_index]['freq_' + rec] = \
                                statistics[readability_index][value_index]['freq_' + rec] + 1
                        else:
                            print("Uncategorized: " + str(index[value_index]))

                        statistics[readability_index]['freq_' + value_index] = \
                            statistics[readability_index]['freq_' + value_index] + 1
                    else:
                        statistics[readability_index]['skipped_' + value_index] = \
                            statistics[readability_index]['skipped_' + value_index] + 1
            else:
                for value_index in self.values_index[readability_index]:
                    statistics[readability_index]['skipped_' + value_index] = \
                        statistics[readability_index]['skipped_' + value_index] + 1

    def count_average(self, statistics):
        for readability_index in statistics['indexes']:
            for value_index in self.values_index[readability_index]:
                if statistics[readability_index]['sum_' + value_index] != 0:
                    statistics[readability_index]['avg_' + value_index] = \
                        statistics[readability_index]['sum_' + value_index] / statistics[readability_index][
                            'freq_' + value_index]
                else:
                    statistics[readability_index]['sum_' + value_index] = 0
                for category in statistics['categories']:
                    if statistics[readability_index][category][
                            'sum_' + value_index] != 0:
                        statistics[readability_index][category]['avg_' + value_index] = \
                            statistics[readability_index][category]['sum_' + value_index] / \
                            statistics[readability_index][category]['freq_' + value_index]
                    else:
                        statistics[readability_index][category][
                            'sum_' + value_index] = 0

    def analyse_readability_file(self, readability_file, categories):
        statistic = dict()
        self.initialize_values(statistic, categories)

        file = load_as_json(readability_file)
        for record in file:
            self.record_analysis(record, statistic)

        self.count_average(statistic)
        return statistic

    def analyse_readability_file_save_results(self, readability_file,
                                              output_statistics_file,
                                              categories):
        statistics = self.analyse_readability_file(readability_file,
                                                   categories)
        save_as_json(statistics, output_statistics_file)

Beispiel #10

Datei anzeigen

Datei: coherence_readability.py Projekt: AlexisDusart/ISSumSet

l_linsear_write = []
l_spache = []
l_flesch_ease = []

for i in os.listdir(PATH):
    if not i.startswith('.'):
        if i not in l_not_use:
            with open(PATH + i, 'r') as f:
                text = f.read()
                r = Readability(clean(text))
                s1 = r.flesch_kincaid()
                s2 = r.flesch()
                s3 = r.gunning_fog()
                s4 = r.coleman_liau()
                s5 = r.dale_chall()
                s6 = r.ari()
                s7 = r.linsear_write()
                # r.smog()
                s8 = r.spache()
                l_flesch_kincaid.append(s1.score)
                l_flesch.append(s2.score)
                l_flesch_ease.append(s2.ease)
                l_gunning_fog.append(s3.score)
                l_coleman_liau.append(s4.score)
                l_dale_chall.append(s5.score)
                l_ari.append(s6.score)
                l_linsear_write.append(s7.score)
                l_spache.append(s8.score)
"""
-------------------------------------------------------------------------------------------------------
Scores for all tweets