def _parse_one_chapter(self, element, url): item = ChapterListItem() item['title'] = element.xpath('./a/text()')[0] item['url'] = urljoin(url, element.xpath('./a/@href')[0]) update_str = element.xpath('./a/@title')[0] item['updated_at'] = update_str.rsplit(' ', 1)[0].split(u':')[-1] item['word_count'] = int(update_str.rsplit(' ', 1)[-1].split(u':')[-1]) return item
def parse_chapter_list(self, content, url): try: sel = etree.HTML(content) except ValueError: raise ValueError("can't parse any volume") chapters = sel.xpath('//ul[@class="ListRow"]/li/a')[:-2] chapter_ordinal = 1 for chapter in chapters: try: item = ChapterListItem() item['url'] = urljoin(url, chapter.xpath('./@href')[0]) item['title'] = chapter.xpath('./text()')[0] item['updated_at'] = None item['word_count'] = 0 item['chapter_ordinal'] = chapter_ordinal chapter_ordinal += 1 yield item except Exception as e: self.logger.error(e)
def parse_chapter_list(self, content, url): try: sel = etree.HTML(content) except ValueError: raise ValueError("can't parse any volume") chapters = sel.xpath('//div[@id="list"]/dl/dd')[9:] self.logger.debug(chapters) chapter_ordinal = 1 for chapter in chapters: item = ChapterListItem() try: item['title'] = chapter.xpath('./a/text()')[0] item['url'] = chapter.xpath('./a/@href')[0] item['updated_at'] = '' item['word_count'] = 0 item['chapter_ordinal'] = chapter_ordinal chapter_ordinal += 1 yield item except Exception as e: self.logger.error(e)
def parse_chapter_list(self, content, url): try: sel = etree.HTML(content) except ValueError: raise ValueError("can't parse any volume") chapters = sel.xpath('//ul[@class="catalog-list cl"]/li') chapter_ordinal = 1 for chapter in chapters: if chapter.xpath('./i[@class="iconfont"]'): break item = ChapterListItem() try: item['title'] = chapter.xpath('./a/text()')[0] item['url'] = urljoin(url, chapter.xpath('./a/@href')[0]) item['updated_at'] = '' item['word_count'] = 0 item['chapter_ordinal'] = chapter_ordinal chapter_ordinal += 1 yield item except Exception as e: self.logger.error(e)