def download_story(self, start_chapter=None, stop_chapter=None, exit_on_error=EXIT_ON_ERROR, download_bonus_chapter=DOWNLOAD_BONUS_CHAPTER, chapter_list_ascending=CHAPTER_LIST_ASCENDING): """ :param start_chapter: :param stop_chapter: :param exit_on_error: :param download_bonus_chapter: Do not download 'Bonus' chapters if False :param chapter_list_ascending: chapter name order will be ascending :return: Story initial check => Return if failure Reverse chapter (if any) => chapter name order will be ascending Get download range (from start_chapter to stop_chapter) Import story to mongo (name only) Loop through story_download_range Download bonus chapter (if any or match condition) Call self.download_a_chapter and append result to 'story_result' Stop downloading on failure if 'exit_on_error' is True and chapter_result's status is False """ utils.mkdir(self.story_dir) if 'Story data is invalid' in self.story_init_check().keys(): return self.story_init_check() if not chapter_list_ascending: self.chapter_list['summary'].reverse() story_download_range = self.get_story_download_range(start_chapter, stop_chapter) if story_download_range is not None: self.mongo_instance.import_story(opt_xpath={self.site_url: self.opt_xpath}) for chapter_name in story_download_range: if not download_bonus_chapter and 'Bonus' in chapter_name: continue self.story_result[chapter_name] = self.download_a_chapter(chapter_name=chapter_name, start_page=1) if exit_on_error and not self.story_result[chapter_name]['status']: print "%s : Failed to download chapter %s" % (self.story_url, chapter_name) break return self.story_result else: print "Story {0} - Failed to get download range : {1}".format(self.story_name, self.story_url) return {'Story data is invalid': 'Failed to get download range'}
def chapter_download(self, move_chapter_to_img_dir=True): """ :param move_chapter_to_img_dir: Move downloaded chapter to image dir :return: chapter_result :type return: dict() Validate page_list: if unable to get or is null => Return default chapter_result Remove self.chapter_dir : Make sure all old files will be deleted Append all pages to threading and run Validate chapter after downloading Move chapter to img_dir and import result to mongodb """ if self.page_list == list(): return self.chapter_result utils.rmdir(self.chapter_dir) utils.mkdir(self.chapter_dir) page_number = int(self.start_page) print "\nDOWNLOADING Story : {0} - chapter-{1} - site_url {2} - {3} pages"\ .format(self.story.story_name, self.chapter_name, self.story.site_url, len(self.page_list)) # Enable this for debug only # if self.story.site_url == 'blogtr' and self.chapter_name == '2': # self.page_list.append("-0S2VlsSH424/UTqPDOXh7HI/AAAAAAAAAZg/P2gifiG1dEo") threads = list() for page_url in self.page_list: page = Page(self, page_number=page_number, page_url=page_url) thread = threading.Thread(target=page.page_download, args=(self.chapter_result,)) threads.append(thread) page_number += 1 utils.threading_run(threads=threads) self.validate_chapter() if self.chapter_result['status']: if move_chapter_to_img_dir: self.chapter_result['status'] = self.do_move_chapter_to_img_dir() self.import_chapter_to_mongo() return self.chapter_result