Ejemplo n.º 1
0
if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('UTF8')
    if len(sys.argv) < 2:
        print("URL missing! Please try again.")
    elif len(sys.argv) > 2:
        print(
            "The program takes exactly one argument. Two received. Please try again."
        )
    else:
        url = sys.argv[1]
        parser = PageParser(url)
        allText = parser.getAllText()
        # print(allText)
        titleText = parser.getTitle()
        # print(titleText)
        headingText = parser.getHeadings()
        # print(headingText)

        allAnalyzer = PageTopicAnalyzer(allText)
        # print(allAnalyzer.bagOfWords)
        titleAnalyzer = PageTopicAnalyzer(titleText)
        # print(titleAnalyzer.bagOfWords)

        ## Unigram ##
        allAnalyzer.unigram()
        titleAnalyzer.bigram()
        titleAnalyzer.weighted(20)
        # print(titleAnalyzer.wordCount)
        uni_analyzer = allAnalyzer + titleAnalyzer