def scanOnePage(self, entry): """Gathers all links from one Page. The recognition of links is delegated to the LinkSearch class""" rs = urllib.parse.urlparse(entry.Here) pa = rs.netloc + rs.path if LinkSearch.normalize(pa) in self.Blacklist: return if entry.Here in self.Visited: return else: self.Visited.append(entry.Here) try: r = requests.get(entry.Here) if not r.ok: raise except: self.Unfetchable.append(entry.Here) return self.Finder.set_Page(r.text) self.Finder.set_Base(entry) ret = self.Finder.find() return ret
def set_Base(self,base): self.nl = LinkSearch.normalize(urllib.parse.urlparse(base.Here).netloc) super().set_Base(base)
def setRoot(self, root): self.Root = root self.Domain = LinkSearch.normalize(urllib.parse.urlparse(root).netloc) # instantiate LinkFinder or your special derivative of it your descendant of LinkSearch if you like #self.Finder = Port5Search.Port5Finder(self.Domain) self.Finder = LinkSearch.LinkFinder(self.Domain)