def calculateInverseDocumentFrequency(self): wordInSentenceDictionary = {} inverseDocumentFrequencyDictionary = {} wordList = processStopwords(self.newsArticle) sentences = self.getTokenizedSentences() # print(f'\nWords: {wordList}') # print(f'\n Sentences: {sentences}') for word in wordList: for sentence in sentences: try: if word in sentence: if word in wordInSentenceDictionary: wordInSentenceDictionary[word] += 1 else: wordInSentenceDictionary[word] = 1 inverseDocumentFrequencyDictionary[word] = \ len(sentences) / wordInSentenceDictionary[word] except KeyError: if word in wordInSentenceDictionary: wordInSentenceDictionary.pop(word) if word in inverseDocumentFrequencyDictionary: inverseDocumentFrequencyDictionary.pop(word) return inverseDocumentFrequencyDictionary
def scoreSentences(self) -> dict: freqTable = Counter(processStopwords(self.newsArticle)) sentences = self.getTokenizedSentences() sentenceValue = dict() for sentence in sentences: textBlob = TextBlob(sentence) word_count_in_sentence = len(textBlob.words) # (len(word_tokenize(sentence))) # print(f'word count in sentence: {word_count_in_sentence}') for wordValue in freqTable: if wordValue in sentence.lower(): if sentence[:10] in sentenceValue: sentenceValue[sentence[:10]] += freqTable[wordValue] else: sentenceValue[sentence[:10]] = freqTable[wordValue] sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] // word_count_in_sentence return sentenceValue
def calculateTermFrequency(self): if self.newsArticle.getArticle() == "": print("No Article from calculateTermFrequency") return termCounterDictionary = dict() termFrequencyDictionary = dict() sentences = self.getTokenizedSentences() docWordList = processStopwords(self.newsArticle) # print(f'\nList of important words: {docWordList}\n') # print(f'\nTotal number of important words: {len(docWordList)}\n') for word in docWordList: for sentence in sentences: if word in sentence: if word in termCounterDictionary: termCounterDictionary[word] = termCounterDictionary[word] + 1 else: termCounterDictionary[word] = 1 termFrequencyDictionary[word] = \ termCounterDictionary[word] / len(self.newsArticle.getArticle()) return termFrequencyDictionary
baseNewsArticle = BaseNewsArticle(heading=heading, article=article, summary=given_summary) print( f'Article #{line_count}\nHeading:\n{heading} \n\nSummary:\n{given_summary} ' f'\n\nArticle:\n{article}\n') print( f'Summary using library:\n{convertListToString(generated_summary)}\n' ) processSentences = ProcessSentences(baseNewsArticle) lengthOfHeading = len(heading.strip().split(" ")) numberOfWordsInArticle = baseNewsArticle.getTotalNumberOfWords( ) numberOfImpWords = len(processStopwords(baseNewsArticle)) numberOfSentences = len( processSentences.getTokenizedSentences()) termFrequency = getDictionaryAsString( processSentences.calculateTermFrequency(), Type.TERM_FREQUENCY) inverseDocumentFrequency = getDictionaryAsString( processSentences.calculateInverseDocumentFrequency(), Type.INVERSE_DOCUMENT_FREQUENCY) termUniqueness = getDictionaryAsString( processSentences.calculateTermUniqueness(), Type.TERM_UNIQUENESS) numberOfStopWordsRemoved = numberOfWordsInArticle - numberOfImpWords lengthOfGivenSummary = len(given_summary.strip().split(" ")) medium_summary = processSentences.generate_summary(1.5)