def __init__(self): myCrawler = Crawler(self.LINKS) crawledURLs = myCrawler.getVisited() linkStructure = myCrawler.getLinkStructure() print("Link-Struktur:\n") myCrawler.printLinkStructure() myPageRank = PageRank(linkStructure) pageRanks = myPageRank.getPageRank() print("\n\nPageRanks:\n") myPageRank.printPageRank() myIndex = Index(self.STOPWORDS, crawledURLs) index = myIndex.getIndex() print("\n\nIndex:\n") myIndex.printIndex() myScorer = Scorer(pageRanks, index,linkStructure) #myScorer.usePageRank(True) print("\n\nDokumentenlängen:\n") myScorer.printDocumentLengths() print("\n\nSuchergebnisse:\n") myScorer.calculateScores(["tokens"]) myScorer.calculateScores(["index"]) myScorer.calculateScores(["classification"]) myScorer.calculateScores(["tokens", "classification"])
def main(): crawler = Crawler([ "http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d01.html", "http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d06.html", "http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d08.html" ]) crawler.crawl() bank = crawler.get_bank() bank.sortBank() print '\nLinkstruktur: \n' bank.printOutgoing() print '\nPageRanks:' rank = PageRank(bank, 0.95, 0.04) rank.calculate() print '\n\nIndex: \n' i = Index( bank ) i.printIndex() s = Scorer( 'tokens', i ) print '\nDokumentenlaenge: \n' s.printDocumentLength() print '\nSuchergebnisse: \n' s.printScoring() s = Scorer( 'index', i ) s.printScoring() s = Scorer( 'classification', i ) s.printScoring() s = Scorer( 'tokens classification', i ) s.printScoring()