def setFiles(self, html, url): self.setHTML(html) internalList = [] for files in self.getHTML().findAll('a', href=True): linkToFile = files['href'] if UrlUtils.externalLink(url, linkToFile) and UrlUtils.containsHTTP(linkToFile): self._externals.append(linkToFile) else: if UrlUtils.containsHTTP(linkToFile) is False: linkToFile = UrlUtils.assertSiteWithFile(url, linkToFile) if ExtensionsFile.hasExtension(linkToFile) and not UrlUtils.externalLink(url, linkToFile): internalList.append(linkToFile) self.filterFiles(internalList)
def pageOrExternal(self, page, url): if url in page: return True elif UrlUtils.externalLink(url, page): self.setExternals(page) return False