def list_chapters(self, data): url = self.get_series_url(data) content = urlretrieve(url) doc = ET.HTML(content) chapters = self._list_chapters(doc) pages = set([n.attrib['href'] \ for n in doc.xpath("//ul[@class='pgg']/li/a")]) for url in pages: content = urlretrieve(url) doc = ET.HTML(content) chapters += self._list_chapters(doc) chapters.sort(lambda a, b: smart_cmp(a['chapter_label'], b['chapter_label'])) return chapters
def list_chapters(self, data): url = self.get_series_url(data) content = urlretrieve(url) doc = ET.HTML(content) chapters = self._list_chapters(doc) pages = [self.baseurl+n.attrib['href'] \ for n in filter(lambda n: n.attrib['href'].startswith('/search.php?series='), doc.xpath("//a"))] for url in pages: content = urlretrieve(url) doc = ET.HTML(content) chapters += self._list_chapters(doc) chapters.sort(lambda a, b: smart_cmp(a['chapter'], b['chapter'])) return chapters
def list_chapters(self, data): url = self.get_series_url(data) content = urlretrieve(url) doc = ET.HTML(content) chapters = self._list_chapters(doc) pages = [self.baseurl+n.attrib['href'] \ for n in filter(lambda n: n.attrib['href'].startswith('/series/'), doc.xpath("//div[@class='pagenav']/div/a"))] for url in pages: content = urlretrieve(url) doc = ET.HTML(content) chapters += self._list_chapters(doc) chapters.sort(lambda a, b: smart_cmp(a['chapter_label'], b['chapter_label'])) return chapters
def list_chapters(self, data): url = self.get_series_url(data) content = urlretrieve(url) doc = ET.HTML(content) chapters = self._list_chapters(doc) pages = set([n.attrib['href'] \ for n in doc.xpath("//ul[@class='pgg']/li/a")]) for url in pages: content = urlretrieve(url) doc = ET.HTML(content) chapters += self._list_chapters(doc) chapters.sort( lambda a, b: smart_cmp(a['chapter_label'], b['chapter_label'])) return chapters
def download_chapter(self, data): print(data) filename = self.get_chapter_filename(data) url = data['chapter_url'] content = urlretrieve(url, headers=self.http_headers) fo = open(filename, 'wb') fo.write(content) fo.close()
def download_page(self, data): url = self.get_page_url(data) f = urlopen(url) doc = ET.HTML(f.read()) img_url = self._download_page(doc) filename = self.get_page_filename(data) filename += os.path.splitext(img_url)[-1].split('&')[0].lower() content = urlretrieve(img_url) fo = open(filename, 'wb') fo.write(content) fo.close()