def compileAnalytics(self): lines = self.ratingsFile.readlines() i = 0 while i < len(lines) - 1: line = lines[i].strip() if line and len(line) <= 3 and line[0] != "-": self.incrementTotalNumber(line) i += 1 firstLine = lines[i].strip() paragraphID = firstLine.split(";")[2] paragraph = [] sentenceIds = [] while lines[i].strip() and i < len(lines) - 1: currLine = lines[i].strip() sentence = currLine.split(";")[4] paragraph.append(sentence) sentenceIds.append(currLine.split(";")[0]) i += 1 for testNum in self.tests: if testNum == 1: passed = checkStartingWordRepetition_test1(paragraph) elif testNum == 2: passed = checkLengthVariation_test2(paragraph) elif testNum == 3: if paragraphID not in POSCache: partOfSpeechMap = compilePartOfSpeechMap(paragraphID) POSCache[paragraphID] = partOfSpeechMap else: partOfSpeechMap = POSCache[paragraphID] passed = checkModifierCount_test3(paragraph, partOfSpeechMap) elif testNum == 4: passed = checkPhraseRepetition_test4(paragraph) elif testNum == 5: passed = checkFirstSentence_test5(paragraphID, paragraph) elif testNum == 6: passed = self.checkBasicWordProportion_test6(paragraph, paragraphID) elif testNum == 7: passed = self.checkTopSyntacticRule_test7(paragraph, sentenceIds) elif testNum == 8: passed = self.checkSyntacticNgrams_test8(paragraph, sentenceIds) elif testNum == 9: passed = self.checkSyntacticLengthVariation_test9(paragraph, sentenceIds) if passed == False: self.incrementRejectionNumber(line) if line[0] == "3": print "testNum failed: " + str(testNum) print paragraph break i += 1
def getSynset(token, paragraphID): if paragraphID not in POSCache: partOfSpeechMap = compilePartOfSpeechMap(paragraphID) POSCache[paragraphID] = partOfSpeechMap else: partOfSpeechMap = POSCache[paragraphID] if partOfSpeechMap[token] == "Nouns": pos = "n" elif partOfSpeechMap[token] == "Verbs": pos = "v" elif partOfSpeechMap[token] == "Modifiers": pos = "a" else: return False synsetStr = token + "." + pos + "." + "01" try: token = wn.synset(synsetStr) except: token = False return token
def checkBasicWordProportion_test6(self, paragraph, paragraphID): numBasicWords = 0 numWords = 0 numGivenWords = 0 if paragraphID not in POSCache: partOfSpeechMap = compilePartOfSpeechMap(paragraphID) POSCache[paragraphID] = partOfSpeechMap else: partOfSpeechMap = POSCache[paragraphID] for word in partOfSpeechMap: if partOfSpeechMap[word] != "Basic Words": numGivenWords += 1 for sentence in paragraph: for word in sentence.split(): numWords += 1 if word.lower().rstrip() in self.basicWords: numBasicWords += 1 if not (numWords - numBasicWords) == 0: basicWordsProportion = float(numBasicWords) / (numWords - numBasicWords) else: basicWordsProportion = float(numWords) if basicWordsProportion / numGivenWords > 0.095: return False return True