Esempio n. 1
0
    def get_passage_range(self, book, chapter_from, passage_from, chapter_to,
                          passage_to):
        """
        Gets a range of passages from one specific passage to another passage from the Bible Gateway site

        :param book: Name of the book
        :type book: str
        :param chapter_from: First chapter number to get
        :type chapter_from: int
        :param passage_from: First passage number to get in the first chapter
        :type passage_from: int
        :param chapter_to: Last chapter number to get
        :type chapter_to: int
        :param passage_to: Last passage number to get in the last chapter
        :type passage_to: int
        :return: All passages between the specified passages (inclusive). Empty string/list if the passage is invalid.
        :rtype: str (list if self.output_as_list is True)
        """
        # Capping the chapter and passage information, as this gets included in site search string and can cause
        # the web request to stagger if this manages to be long enough.
        capped_chapter_from = common.get_capped_integer(
            chapter_from,
            max_value=common.get_chapter_count(book, self.translation))
        capped_passage_from = common.get_capped_integer(passage_from)
        capped_chapter_to = common.get_capped_integer(
            chapter_to,
            max_value=common.get_chapter_count(book, self.translation))
        capped_passage_to = common.get_capped_integer(passage_to)
        # Defer to a direct search invocation when sourcing passages from the same chapter
        if capped_chapter_from == capped_chapter_to:
            return self.search('{0} {1}:{2} - {3}'.format(
                book, capped_chapter_from, capped_passage_from,
                capped_passage_to))

        # Get the partial section of the first chapter being requested, omitting some initial passages
        initial_chapter = self.get_passages(book, capped_chapter_from,
                                            capped_passage_from,
                                            common.get_end_of_chapter())
        # Get the partial section of the last chapter being requested, omitting some trailing passages
        final_chapter = self.get_passages(book, capped_chapter_to, 1,
                                          capped_passage_to)
        # Get all the chapters in between the initial and final chapters (exclusive since they have been pre-fetched).
        # Sandwich those chapters between the first and last pre-fetched chapters to combine all the passage data.
        chapters = [initial_chapter] + \
                   [self.get_chapter(book, chapter)
                    for chapter in range(capped_chapter_from + 1, capped_chapter_to)] + [final_chapter]
        if self.output_as_list:
            # Flattens the data structure from a list of lists to a normal list
            return [
                chapter for chapter_list in chapters
                for chapter in chapter_list
            ]
        return '\n'.join(chapters)
Esempio n. 2
0
    def get_book(self, book):
        """
        Gets all chapters for a specific book from the Bible Gateway site

        :param book: Name of the book
        :type book: str
        :return: All passages in the specified book. Empty string/list if the passage is invalid.
        :rtype: str (list if self.output_as_list is True)
        """
        return self.get_passage_range(
            book, 1, 1, common.get_chapter_count(book, self.translation),
            common.get_end_of_chapter())
    def get_book(self, book, file_path=''):
        """
        Gets all chapters for a specific book from the Bible files

        :param book: Name of the book
        :type book: str
        :param file_path: When specified, reads the file from this location with a custom filename and extension.
                          Using this parameter will take priority over the default_directory class property.
                          Defaults to the default_directory path with the book as the file name with a default
                          extension.
        :type file_path: str
        :return: All passages in the specified book. Empty string/list if the passage is invalid.
        :rtype: str (list if self.output_as_list is True)
        """
        return self.get_chapters(
            book, 1, common.get_chapter_count(book, self.translation),
            file_path)
    def download_book(self, book, file_path=''):
        """
        Downloads a specific book of the Bible and saves it as a file

        :param book: Name of the book
        :type book: str
        :param file_path: When specified, saves the file to this location with a custom filename and extension.
                          Using this parameter will take priority over the default_directory class property.
                          Defaults to the default_directory path with the book as the file name with a default
                          extension.
        :type file_path: str
        :return: 1 if the download was successful. 0 if an error occurred.
        :rtype: int
        """
        return self.download_passage_range(
            book, 1, 1, common.get_chapter_count(book, self.translation),
            common.get_end_of_chapter(), file_path)
    def download_passage_range(self,
                               book,
                               chapter_from,
                               passage_from,
                               chapter_to,
                               passage_to,
                               file_path=''):
        """
        Downloads a range of passages from one specific passage to another passage as a file

        :param book: Name of the book
        :type book: str
        :param chapter_from: First chapter number to get
        :type chapter_from: int
        :param passage_from: First passage number to get in the first chapter
        :type passage_from: int
        :param chapter_to: Last chapter number to get
        :type chapter_to: int
        :param passage_to: Last passage number to get in the last chapter
        :type passage_to: int
        :param file_path: When specified, saves the file to this location with a custom filename and extension.
                          Using this parameter will take priority over the default_directory class property.
                          Defaults to the default_directory path with the book as the file name with a default
                          extension.
        :type file_path: str
        :return: 1 if the download was successful. 0 if an error occurred.
        :rtype: int
        """
        translation = self.translation.upper()
        if common.is_unsupported_translation(translation):
            raise UnsupportedTranslationError(translation)
        # Standardise letter casing with minimal impact to the resulting file
        book_name = book.title()

        if common.get_chapter_count(book_name, translation) <= 0:
            raise InvalidPassageError(book_name, chapter_from, passage_from,
                                      chapter_to, passage_to, translation)
        # Cap passage components to ensure input validity and minimise web requests by avoiding invalid chapters
        capped_chapter_from = common.get_capped_integer(
            chapter_from,
            max_value=common.get_chapter_count(book_name, translation))
        capped_passage_from = common.get_capped_integer(passage_from)
        capped_chapter_to = common.get_capped_integer(
            chapter_to,
            max_value=common.get_chapter_count(book_name, translation))
        capped_passage_to = common.get_capped_integer(passage_to)

        online_bible = WebExtractor(
            translation=translation,
            show_passage_numbers=self.show_passage_numbers,
            output_as_list=True,
            strip_excess_whitespace_from_list=self.strip_excess_whitespace,
            use_ascii_punctuation=self.use_ascii_punctuation)

        # Set up the base document with the root-level keys
        # Upon downloading a file, the top-level keys might be ordered differently to when they were inserted.
        # This is likely due to Python not sorting dictionary keys internally, but could be due to something else.
        # This does not affect the information contained in the downloaded file, but could affect file comparisons.
        document = {
            'Info': {
                'Language': common.get_translation_language(translation),
                'Translation': translation
            },
            book_name: {}
        }

        # Don't initialise the thread pool unless the extractor has been set to use multiprocessing.
        # This logic could be already running in a daemon process, and initialising the pool will cause an error.
        process_pool = None
        if self.enable_multiprocessing:
            process_pool = multiprocessing.Pool()
        process_results = []

        # Range is extended by 1 to include chapter_to in the loop iteration
        chapter_range = range(capped_chapter_from, capped_chapter_to + 1)
        for chapter in chapter_range:
            passage_initial = 1
            passage_final = common.get_end_of_chapter()
            # Exclude a certain first half of the initial chapter based on where the passage start should be
            if chapter == capped_chapter_from:
                passage_initial = capped_passage_from
            # Exclude a certain last half of the last chapter based on where the passage end should be
            if chapter == capped_chapter_to:
                passage_final = capped_passage_to

            if self.enable_multiprocessing:
                # Asynchronously obtain each set of passages to reduce overall download time.
                # These are daemon processes, so these shouldn't block the program from exiting and should be
                # expected to be garbage collected if the main process is stopped.
                process = process_pool.apply_async(
                    self._get_passages_dict, (online_bible, book_name, chapter,
                                              passage_initial, passage_final),
                    error_callback=self.__handle_exception_from_process)
                # Add the process result to the list and extract the value later to prioritise doing more work
                process_results.append(process)
            else:
                document[book_name][chapter] = self._get_passages_dict(
                    online_bible, book_name, chapter, passage_initial,
                    passage_final)

        if self.enable_multiprocessing:
            # Close the pool manually, as the garbage collector might not dispose of this automatically
            process_pool.close()
            # Explicitly wait for the processes to finish up in case some processes have heavy workloads
            process_pool.join()
            # When multiprocessing, all process results should be retrieved as a batch operation to minimise
            # the total time cost associated with the "get" method for each result.
            document[book_name] = {
                chapter: process_results.pop(0).get()
                for chapter in chapter_range
            }

        if len(file_path) <= 0:
            file_location = os.path.join(
                self.default_directory,
                '{0}{1}'.format(book_name, self.file_extension))
        else:
            file_location = file_path
        return self.file_writing_function(file_location, document)