Exemple #1
0
    def crawl(self, url,fun, deep=5):
        page = TylCrawlerPage(url=url)
        fetcher = TylCrawlerFetcher()
        pages = [] 
        pages.append([page])

        try:
            for i in range(deep):
                pn = []
                for p in pages[i]:
                    fetcher.fetch(p)
                    p.fetcher = fetcher
                    links = p.getLinks(self.host)
                    fun(p)

                    if hasattr(self, "sleepSec"):
                        time.sleep(self.sleepSec)

                    if (i+1) == deep:
                        continue
                    for link in links:

                        link = self.fixUrl(link)

                        if self.urlCrawled(link):
                            continue;
                        self.crawledList.append(link)
                        pchild = TylCrawlerPage(url=link)
                        pchild.setReferer(p.url)
                        pchild.cookieJar = p.cookieJar
                        pchild.level = i+1
                        pn.append(pchild)
                        #print link
                pages.append(pn)
        except ValueError as e:
            print e