Ejemplo n.º 1
0
 def setImages(self, html, url):
     self.setHTML(html)
     imgList = self.getHTML().findAll('img')
     images = []
     for img in imgList:
         linkToImg = img.get('src')
         if UrlUtils.containsHTTP(img.get('src')) is False:
             linkToImg = UrlUtils.assertSiteWithFile(url, img.get('src'))
         images.append(linkToImg.strip())
     self.__allImages(images)
Ejemplo n.º 2
0
 def setFiles(self, html, url):
     self.setHTML(html)
     internalList = []
     for files in self.getHTML().findAll('a', href=True):
         linkToFile = files['href']
         if UrlUtils.externalLink(url, linkToFile) and UrlUtils.containsHTTP(linkToFile):
             self._externals.append(linkToFile)
         else:
             if UrlUtils.containsHTTP(linkToFile) is False:
                 linkToFile = UrlUtils.assertSiteWithFile(url, linkToFile)
             if ExtensionsFile.hasExtension(linkToFile) and not UrlUtils.externalLink(url, linkToFile):
                 internalList.append(linkToFile)
     self.filterFiles(internalList)