Esempio n. 1
0
    def download_story(self, start_chapter=None, stop_chapter=None, exit_on_error=EXIT_ON_ERROR,
                       download_bonus_chapter=DOWNLOAD_BONUS_CHAPTER, chapter_list_ascending=CHAPTER_LIST_ASCENDING):
        """
        :param start_chapter:
        :param stop_chapter:
        :param exit_on_error:
        :param download_bonus_chapter: Do not download 'Bonus' chapters if False
        :param chapter_list_ascending: chapter name order will be ascending
        :return:

        Story initial check => Return if failure
        Reverse chapter (if any) => chapter name order will be ascending
        Get download range (from start_chapter to stop_chapter)
        Import story to mongo (name only)
        Loop through story_download_range
            Download bonus chapter (if any or match condition)
            Call self.download_a_chapter and append result to 'story_result'
            Stop downloading on failure if 'exit_on_error' is True and chapter_result's status is False

        """
        utils.mkdir(self.story_dir)

        if 'Story data is invalid' in self.story_init_check().keys():
            return self.story_init_check()

        if not chapter_list_ascending:
            self.chapter_list['summary'].reverse()

        story_download_range = self.get_story_download_range(start_chapter, stop_chapter)

        if story_download_range is not None:
            self.mongo_instance.import_story(opt_xpath={self.site_url: self.opt_xpath})

            for chapter_name in story_download_range:

                if not download_bonus_chapter and 'Bonus' in chapter_name:
                    continue

                self.story_result[chapter_name] = self.download_a_chapter(chapter_name=chapter_name, start_page=1)

                if exit_on_error and not self.story_result[chapter_name]['status']:
                    print "%s : Failed to download chapter %s" % (self.story_url, chapter_name)
                    break

            return self.story_result
        else:
            print "Story {0} - Failed to get download range : {1}".format(self.story_name, self.story_url)
            return {'Story data is invalid': 'Failed to get download range'}
Esempio n. 2
0
    def chapter_download(self, move_chapter_to_img_dir=True):
        """
        :param move_chapter_to_img_dir: Move downloaded chapter to image dir
        :return: chapter_result
        :type return: dict()
        Validate page_list: if unable to get or is null => Return default chapter_result
        Remove self.chapter_dir : Make sure all old files will be deleted
        Append all pages to threading and run
        Validate chapter after downloading
        Move chapter to img_dir and import result to mongodb
        """
        if self.page_list == list():
            return self.chapter_result

        utils.rmdir(self.chapter_dir)
        utils.mkdir(self.chapter_dir)
        page_number = int(self.start_page)
        print "\nDOWNLOADING Story : {0} - chapter-{1} - site_url {2} - {3} pages"\
            .format(self.story.story_name, self.chapter_name, self.story.site_url, len(self.page_list))

        # Enable this for debug only
        # if self.story.site_url == 'blogtr' and self.chapter_name == '2':
        # self.page_list.append("-0S2VlsSH424/UTqPDOXh7HI/AAAAAAAAAZg/P2gifiG1dEo")

        threads = list()
        for page_url in self.page_list:
            page = Page(self, page_number=page_number, page_url=page_url)
            thread = threading.Thread(target=page.page_download, args=(self.chapter_result,))
            threads.append(thread)
            page_number += 1
        utils.threading_run(threads=threads)

        self.validate_chapter()

        if self.chapter_result['status']:
            if move_chapter_to_img_dir:
                self.chapter_result['status'] = self.do_move_chapter_to_img_dir()
                self.import_chapter_to_mongo()
        return self.chapter_result