def __init__(self):
        myCrawler = Crawler(self.LINKS)
        crawledURLs = myCrawler.getVisited()
        linkStructure = myCrawler.getLinkStructure()
        print("Link-Struktur:\n")
        myCrawler.printLinkStructure()

        myPageRank = PageRank(linkStructure)
        pageRanks = myPageRank.getPageRank()
        print("\n\nPageRanks:\n")
        myPageRank.printPageRank()

        myIndex = Index(self.STOPWORDS, crawledURLs)
        index = myIndex.getIndex()
        print("\n\nIndex:\n")
        myIndex.printIndex()

        myScorer = Scorer(pageRanks, index,linkStructure)
        #myScorer.usePageRank(True)
        print("\n\nDokumentenlängen:\n")
        myScorer.printDocumentLengths()
        print("\n\nSuchergebnisse:\n")
        myScorer.calculateScores(["tokens"])
        myScorer.calculateScores(["index"])
        myScorer.calculateScores(["classification"])
        myScorer.calculateScores(["tokens", "classification"])
Example #2
0
def main():

	crawler = Crawler([
		"http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d01.html",
		"http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d06.html",
		"http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d08.html"
	])
	crawler.crawl()

	bank = crawler.get_bank()
	bank.sortBank()

	print '\nLinkstruktur: \n' 
	bank.printOutgoing()

	print '\nPageRanks:'
	rank = PageRank(bank, 0.95, 0.04)
	rank.calculate()

	print '\n\nIndex: \n'
	i = Index( bank )
	i.printIndex()

	s = Scorer( 'tokens', i )
	
	print '\nDokumentenlaenge: \n'
	s.printDocumentLength()
	
	print '\nSuchergebnisse: \n'
	s.printScoring()
	s = Scorer( 'index', i )
	s.printScoring()
	s = Scorer( 'classification', i )
	s.printScoring()
	s = Scorer( 'tokens classification', i )
	s.printScoring()