def test_search_results_has_verse_list(self): """ Test that a SearchResults object with a valid verse list returns True when checking ``has_verse_list()`` """ # GIVEN: A valid SearchResults object with a proper verse list search_results = SearchResults('Genesis', 1, {1: 'In the beginning God created the heavens and the earth.'}) # WHEN: We check that the SearchResults object has a verse list has_verse_list = search_results.has_verse_list() # THEN: It should be True self.assertTrue(has_verse_list, 'The SearchResults object should have a verse list')
def test_search_results_has_no_verse_list(self): """ Test that a SearchResults object with an empty verse list returns False when checking ``has_verse_list()`` """ # GIVEN: A valid SearchResults object with an empty verse list search_results = SearchResults('Genesis', 1, {}) # WHEN: We check that the SearchResults object has a verse list has_verse_list = search_results.has_verse_list() # THEN: It should be False self.assertFalse(has_verse_list, 'The SearchResults object should have a verse list')
def test_search_results_has_no_verse_list(self): """ Test that a SearchResults object with an empty verse list returns False when checking ``has_verse_list()`` """ # GIVEN: A valid SearchResults object with an empty verse list search_results = SearchResults('Genesis', 1, {}) # WHEN: We check that the SearchResults object has a verse list has_verse_list = search_results.has_verse_list() # THEN: It should be False assert has_verse_list is False, 'The SearchResults object should have a verse list'
def test_search_results_has_verse_list(self): """ Test that a SearchResults object with a valid verse list returns True when checking ``has_verse_list()`` """ # GIVEN: A valid SearchResults object with a proper verse list search_results = SearchResults( 'Genesis', 1, {1: 'In the beginning God created the heavens and the earth.'}) # WHEN: We check that the SearchResults object has a verse list has_verse_list = search_results.has_verse_list() # THEN: It should be True assert has_verse_list is True, 'The SearchResults object should have a verse list'
def test_search_results_creation(self): """ Test the creation and construction of the SearchResults class """ # GIVEN: A book, chapter and a verse list book = 'Genesis' chapter = 1 verse_list = { 1: 'In the beginning God created the heavens and the earth.', 2: 'The earth was without form and void, and darkness was over the face of the deep. And the Spirit of ' 'God was hovering over the face of the waters.' } # WHEN: We create the search results object search_results = SearchResults(book, chapter, verse_list) # THEN: It should have a book, a chapter and a verse list self.assertIsNotNone(search_results, 'The search_results object should not be None') self.assertEqual(search_results.book, book, 'The book should be "Genesis"') self.assertEqual(search_results.chapter, chapter, 'The chapter should be 1') self.assertDictEqual(search_results.verse_list, verse_list, 'The verse lists should be identical')
def get_bible_chapter(self, version, book_name, chapter): """ Access and decode bibles via Bibleserver mobile website :param version: The version of the bible like NIV for New International Version :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung :param chapter: Chapter number """ log.debug('BSExtract.get_bible_chapter("%s", "%s", "%s")', version, book_name, chapter) url_version = urllib.parse.quote(version.encode("utf-8")) url_book_name = urllib.parse.quote(book_name.encode("utf-8")) chapter_url = 'http://m.bibleserver.com/text/%s/%s%d' % (url_version, url_book_name, chapter) header = ('Accept-Language', 'en') soup = get_soup_for_bible_ref(chapter_url, header) if not soup: return None self.application.process_events() content = soup.find('div', 'content') if not content: log.error('No verses found in the Bibleserver response.') send_error_message('parse') return None content = content.find('div').find_all('div') verses = {} for verse in content: self.application.process_events() versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) verses[versenumber] = verse.contents[1].rstrip('\n') return SearchResults(book_name, chapter, verses)
def get_bible_chapter(self, version, book_name, chapter): """ Access and decode Bibles via the BibleGateway website. :param version: The version of the Bible like 31 for New International version. :param book_name: Name of the Book. :param chapter: Chapter number. """ log.debug('BGExtract.get_bible_chapter("%s", "%s", "%s")', version, book_name, chapter) url_book_name = urllib.parse.quote(book_name.encode("utf-8")) url_params = 'search=%s+%s&version=%s' % (url_book_name, chapter, version) soup = get_soup_for_bible_ref( 'http://legacy.biblegateway.com/passage/?%s' % url_params, pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='') if not soup: return None div = soup.find('div', 'result-text-style-normal') if not div: return None self._clean_soup(div) span_list = div.find_all('span', 'text') log.debug('Span list: %s', span_list) if not span_list: # If we don't get any spans then we must have the old HTML format verse_list = self._extract_verses_old(div) else: verse_list = self._extract_verses(span_list) if not verse_list: log.debug('No content found in the BibleGateway response.') send_error_message('parse') return None return SearchResults(book_name, chapter, verse_list)
def get_bible_chapter(self, version, book_name, chapter): """ Access and decode bibles via the Crosswalk website :param version: The version of the Bible like niv for New International Version :param book_name: Text name of in english e.g. 'gen' for Genesis :param chapter: Chapter number """ log.debug( 'CWExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'. format(version=version, book=book_name, chapter=chapter)) url_book_name = book_name.replace(' ', '-') url_book_name = url_book_name.lower() url_book_name = urllib.parse.quote(url_book_name.encode("utf-8")) chapter_url = 'http://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format( version=version, book=url_book_name, chapter=chapter) soup = get_soup_for_bible_ref(chapter_url) if not soup: return None self.application.process_events() verses_div = soup.find_all('div', 'verse') if not verses_div: log.error('No verses found in the CrossWalk response.') send_error_message('parse') return None verses = {} for verse in verses_div: self.application.process_events() verse_number = int(verse.find('strong').contents[0]) verse_span = verse.find('span', class_='verse-%d' % verse_number) tags_to_remove = verse_span.find_all(['a', 'sup']) for tag in tags_to_remove: tag.decompose() verse_text = verse_span.get_text() self.application.process_events() # Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and . verse_text = verse_text.strip('\n\r\t ') verse_text = REDUCE_SPACES_REGEX.sub(' ', verse_text) verse_text = FIX_PUNKCTUATION_REGEX.sub(r'\1', verse_text) verses[verse_number] = verse_text return SearchResults(book_name, chapter, verses)
def get_bible_chapter(self, version, book_name, chapter): """ Access and decode bibles via Bibleserver website :param version: The version of the bible like NIV for New International Version :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung :param chapter: Chapter number """ log.debug( 'BSExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'. format(version=version, book=book_name, chapter=chapter)) url_version = urllib.parse.quote(version.encode("utf-8")) url_book_name = urllib.parse.quote(book_name.encode("utf-8")) chapter_url = 'https://bibleserver.com/{version}/{name}{chapter:d}'.format( version=url_version, name=url_book_name, chapter=chapter) soup = get_soup_for_bible_ref(chapter_url) if not soup: return None self.application.process_events() content = soup.find('article', 'chapter') if not content: log.error('No verses found in the Bibleserver response.') send_error_message('parse') return None # remove spans with footnotes for span in soup.find_all('span', 'footnote-tooltip'): span.decompose() # remove noscript tags for noscript in soup.find_all('noscript'): noscript.decompose() content = soup.find_all('span', 'verse') verses = {} for verse in content: self.application.process_events() versenumber = int( verse.find('span', 'verse-number__group').get_text().strip()) verses[versenumber] = verse.find( 'span', 'verse-content--hover').get_text().strip() return SearchResults(book_name, chapter, verses)