if __name__ == '__main__': reload(sys) sys.setdefaultencoding('UTF8') if len(sys.argv) < 2: print("URL missing! Please try again.") elif len(sys.argv) > 2: print( "The program takes exactly one argument. Two received. Please try again." ) else: url = sys.argv[1] parser = PageParser(url) allText = parser.getAllText() # print(allText) titleText = parser.getTitle() # print(titleText) headingText = parser.getHeadings() # print(headingText) allAnalyzer = PageTopicAnalyzer(allText) # print(allAnalyzer.bagOfWords) titleAnalyzer = PageTopicAnalyzer(titleText) # print(titleAnalyzer.bagOfWords) ## Unigram ## allAnalyzer.unigram() titleAnalyzer.bigram() titleAnalyzer.weighted(20) # print(titleAnalyzer.wordCount) uni_analyzer = allAnalyzer + titleAnalyzer