Beispiel #1
0
 def scanOnePage(self, entry):
     """Gathers all links from one Page.
     The recognition of links is delegated to the LinkSearch class"""
     rs = urllib.parse.urlparse(entry.Here)
     pa = rs.netloc + rs.path
     if LinkSearch.normalize(pa) in  self.Blacklist:
         return
     if entry.Here in self.Visited:
         return
     else:
         self.Visited.append(entry.Here)
     try:       
         r = requests.get(entry.Here)
         if not r.ok: raise
     except:
         self.Unfetchable.append(entry.Here)
         return
     self.Finder.set_Page(r.text)
     self.Finder.set_Base(entry)
     ret = self.Finder.find()
     return ret
 def set_Base(self,base):
     self.nl = LinkSearch.normalize(urllib.parse.urlparse(base.Here).netloc)
     super().set_Base(base)        
Beispiel #3
0
 def setRoot(self, root):
     self.Root = root
     self.Domain = LinkSearch.normalize(urllib.parse.urlparse(root).netloc)
     # instantiate LinkFinder or your special derivative of it your descendant of LinkSearch if you like
     #self.Finder = Port5Search.Port5Finder(self.Domain)
     self.Finder = LinkSearch.LinkFinder(self.Domain)