-
Notifications
You must be signed in to change notification settings - Fork 0
/
ReadabilityCalculator.py
115 lines (101 loc) · 4.1 KB
/
ReadabilityCalculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from nltk.corpus import cmudict
from curses.ascii import isdigit
import re
def textAnalyzer(text, tokens):
analyzedVars = {}
analyzedVars['words'] = tokens
analyzedVars['charCount'] = float(CharCount(tokens))
wordCount= float(len(tokens))
analyzedVars['wordCount'] = float(len(tokens))
sentenceCount=SentenceCount(text)
analyzedVars['sentenceCount'] = float(SentenceCount(text))
#analyzedVars['syllableCount'] = float(SyllableCalculator(text))
#analyzedVars['complexwordCount'] = float(complexwordsCount)
if float(sentenceCount)>0:
averageWordsPerSentence = float(float(wordCount)/float(sentenceCount))
else:
averageWordsPerSentence = 0
analyzedVars['averageWordsPerSentence'] = float(averageWordsPerSentence)
return analyzedVars
def SyllableCalculator(text):
d = cmudict.dict()
counter = 0.0
tokens = re.findall("[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+",text)
for token in tokens:
count = 1.0
if token.lower() in d.keys():
count =max([len(list(y for y in x if isdigit(y[-1]))) for x in d[token.lower()]])
counter = counter+count
return counter
def SentenceCount(text):
matches = re.findall('[^.!?]+[.!?]',text)
return len(matches)
def CharCount(tokens):
count = 0.0
for token in tokens:
count= count+ len(token)
return float(count)
def GunningFogIndex(text,analyzedVars):
if analyzedVars['wordCount']>0:
score = 0.4 * ((analyzedVars['averageWordsPerSentence']) + (100 * (analyzedVars['complexwordCount']/analyzedVars['wordCount'])))
else:
score = 0
return round(score, 4)
def ARI(text,analyzedVars):
if analyzedVars['wordCount']>0 and analyzedVars['sentenceCount']>0:
score = 4.71 * (analyzedVars['charCount'] / analyzedVars['wordCount']) + 0.5 * (analyzedVars['wordCount'] / analyzedVars['sentenceCount']) - 21.43
else:
score = 0
return score
def FleschReadingEase(text,analyzedVars):
score = 0.0
if analyzedVars['wordCount']>0:
score = 206.835 - (1.015 * (analyzedVars['averageWordsPerSentence'])) - (84.6 * (analyzedVars['syllableCount']/ analyzedVars['wordCount']))
else:
score = 0
return round(score, 4)
def FleschKincaidGradeLevel(text,analyzedVars):
if analyzedVars['wordCount']>0:
score = 0.39 * (analyzedVars['averageWordsPerSentence']) + 11.8 * (analyzedVars['syllableCount']/ analyzedVars['wordCount']) - 15.59
else:
score = 0
return round(score, 4)
def SMOGIndex(text, analyzedVars):
if analyzedVars['sentenceCount']>0:
score = (math.sqrt(analyzedVars['complexwordCount']*(30/analyzedVars['sentenceCount'])) + 3)
else:
score = 0
return score
def ColemanLiauIndex(text, analyzedVars):
if analyzedVars['wordCount']>0:
score = (5.89*(analyzedVars['charCount']/analyzedVars['wordCount']))-(30*(analyzedVars['sentenceCount']/analyzedVars['wordCount']))-15.8
else:
score = 0
return round(score, 4)
def RIX(text, analyzedVars):
score = 0.0
longwords = 0.0
for word in analyzedVars['words']:
if len(word) >= 7:
longwords += 1.0
if analyzedVars['sentenceCount']>0:
score = longwords / analyzedVars['sentenceCount']
else:
score = 0
return score
def extractReadabilityArray(text, tokens):
readArray = []
analyzedVars = textAnalyzer(text, tokens)
readArray.append(analyzedVars['charCount'])
readArray.append(analyzedVars['sentenceCount'])
#readArray.append(analyzedVars['syllableCount'])
readArray.append(analyzedVars['averageWordsPerSentence'])
readArray.append(float(ARI(text,analyzedVars)))
readArray.append(float(RIX(text,analyzedVars)))
readArray.append(float(ColemanLiauIndex(text,analyzedVars)))
#readArray.append(SMOGIndex(text,analyzedVars))
#readArray.append(float(FleschKincaidGradeLevel(text,analyzedVars)))
#readArray.append(float(FleschReadingEase(text,analyzedVars)))
#readArray.append(GunningFogIndex(text,analyzedVars))
#print readArray
return readArray