Example #1
0
    def get(self):
        global total_data, crawl_count, crawled

        if crawl_count >= DEPTH_LIMIT:
            return False

        crawled.add(self.url)
        data = self.fetch()

        if data and data != bytearray(b' '):
            if total_data > CONTENT_LIMIT:
                return False

            total_data += len(data)
            crawl_count += 1
            webserver.save(self.url, self.root, self.type, data)

            s = Scraper(data, self.console)
            if self.type not in ["JS", "CSS"]:
                #css
                css_links = s.get_css()
                for link in css_links:
                    if link:
                        c = Crawler(link, self, "CSS", self.console)
                        if c.url not in crawled:
                            c.get()
                    else:
                        pass
            if self.type not in ["JS", "CSS"]:
                #js
                js_links = s.get_script()
                self.console.print(js_links)

                for link in js_links:
                    if link:
                        c = Crawler(link, self, "JS", self.console)
                        if c.url not in crawled:
                            c.get()
                    else:
                        pass
            #  hrefs
            if self.type == "HTML":
                links = s.get_links()

                for link in links:
                    if link:
                        c = Crawler(link, self, "HTML", self.console)
                        if c.url not in crawled:
                            c.get()
                    else:
                        pass