def test_parse_modules_folder(self): """ Test that conf file from a folder can be parsed. """ # GIVEN: A SwordModules object using a folder for input modules = SwordModules(TEST_RESOURCE_FOLDER) # WHEN: parsing the modules conf files mods_metadata = modules.parse_modules() # THEN: Modules should be detectable and information extractable module_list = [ u'ChiPinyin', u'FinPR', u'BSV', u'ASV', u'AraNAV', u'SpaRV1909' ] self.assertTrue(all(x in module_list for x in mods_metadata.keys()), u'Some expected bibles were not detected') # Depending on the operating system, the handling of non-utf8 encoded conf-files is different self.assertEqual(mods_metadata[u'FinPR'][u'description'], u'Finnish Pyhä Raamattu (1933/1938)', u'Could not extract "description" for "FinPR"') self.assertEqual(mods_metadata[u'BSV'][u'description'], u'The Bond Slave Version Bible', u'Could not extract "description" for "BSV"') self.assertEqual(mods_metadata[u'ASV'][u'description'], u'American Standard Version (1901)', u'Could not extract "description" for "ASV"') self.assertEqual(mods_metadata[u'AraNAV'][u'description'], u'New Arabic Version (Ketab El Hayat)', u'Could not extract "description" for "AraNAV"')
def generate_dict(source_file, bible_version): modules = SwordModules(source_file) found_modules = modules.parse_modules() bible = modules.get_bible_from_module(bible_version) books = bible.get_structure()._books['ot'] + bible.get_structure()._books['nt'] bib = {} bib['books'] = [] for book in books: chapters = [] for chapter in xrange(1, book.num_chapters+1): verses = [] for verse in xrange(1, len(book.get_indicies(chapter))+1 ): verses.append({ 'verse': verse, 'chapter': chapter, 'name': book.name + " " + str(chapter) + ":" + str(verse), 'text': bible.get(books=[book.name], chapters=[chapter], verses=[verse]) }) chapters.append({ 'chapter': chapter, 'name': book.name + " " + str(chapter), 'verses': verses }) bib['books'].append({ 'name': book.name, 'chapters': chapters }) return bib
def listModules(self, c, args): modules = SwordModules() result = [] try: found_modules = modules.parse_modules() except FileNotFoundError: category = 'list' else: for key in found_modules: row = [] row.append(key) #for item in found_modules[key]: # row.append(found_modules[key][item]) row.append(found_modules[key]['lang']) row.append(found_modules[key]['about'].replace('\par', "\n")) row.append(found_modules[key]['version']) if len(args) == 1: category = "itemized" if found_modules[key]['lang'] == args[0]: result.append(row) else: category = "table" result.append(row) result_object = Result() result_object.category = category result_object.payload = sorted(result) return result_object
def test_load_sparv1909_zip(self): """ Test that the encrypted SpaRV1909.zip file is loaded correctly. """ # GIVEN: The SpaRV1909.zip file modules = SwordModules( os.path.join(TEST_RESOURCE_FOLDER, u'SpaRV1909.zip')) # WHEN: Parsing the SpaRV1909 module and reading a passage. found_modules = modules.parse_modules() bible = modules.get_bible_from_module(u'SpaRV1909') output = bible.get(books=[u'gen'], chapters=[3], verses=[20]) # THEN: The SpaRV1909 module should be the only one found. And the passage should be equal to the known text. assert u'SpaRV1909' in found_modules assert len(found_modules) == 1 print(output) assert output == u'Y llamó el hombre el nombre de su mujer, Eva; por cuanto ella era madre de todos ' \ u'los vivientes.' # WHEN: Reading a second passage output = bible.get(books=[u'john'], chapters=[3], verses=[17]) # THEN: The the passage should be equal to the expected text assert output == u'Porque no envió Dios á su Hijo al mundo para que condene al mundo, mas para ' \ u'que el mundo sea salvo por él.'
def __init__(self, indices): # loads first available bible in current directory # TODO: give user choice self.indices = indices self.bible = None zips = glob.glob("*.zip") for zipfile in zips: modules = SwordModules(zipfile) found_modules = modules.parse_modules() found_module_keys = list(found_modules.keys()) if len(found_module_keys) == 0: continue else: module_key_choice = found_module_keys[0] self.bible = modules.get_bible_from_module(module_key_choice) self.zipfile = zipfile print("Loaded bible from " + str(zipfile)) if self.bible is None: raise NoOfflineBibleException("No bibles found") self.bible = modules.get_bible_from_module(module_key_choice)
def __init__(self, library_path): self.library_path = library_path self.modules = {} for filename in os.listdir(self.library_path): if filename.endswith(".zip"): try: zip_modules = SwordModules(os.path.join(self.library_path, filename)) except: raise # DEBUG - Remove when issues have been sorted out continue discovered = zip_modules.parse_modules() for m in discovered: self.modules[m] = Bible(m, zip_modules.get_bible_from_module(m)) if discovered[m]["feature"] == "NoParagraphs": self.modules[m].use_paragraphs = False
def test_load_asv_zip(self): """ Test that the ASV.zip file is loaded correctly. """ # GIVEN: The ASV.zip file modules = SwordModules(os.path.join(TEST_RESOURCE_FOLDER, u'ASV.zip')) # WHEN: Parsing the ASV module and reading a passage. found_modules = modules.parse_modules() bible = modules.get_bible_from_module(u'ASV') output = bible.get(books=[u'gen'], chapters=[3], verses=[20]) # THEN: The ASV module should be the only one found. And the passage should be equal to the known text. assert u'ASV' in found_modules assert len(found_modules) == 1 assert output == u'And the man called his wife’s name Eve; because she was the mother of all living.'
def test_load_bsv_zip(self): """ Test that the bsv.zip file is loaded correctly. """ # GIVEN: The bsv.zip file modules = SwordModules(os.path.join(TEST_RESOURCE_FOLDER, u'bsv.zip')) # WHEN: Parsing the BSV module and reading a passage. found_modules = modules.parse_modules() bible = modules.get_bible_from_module(u'BSV') output = bible.get(books=[u'john'], chapters=[3], verses=[16]) # THEN: The BSV module should be the only one found. And the passage should be equal to the known text. assert u'BSV' in found_modules assert len(found_modules) == 1 assert output == u'For God so loved the world, that he gave his only begotten Son, that whoever believes in ' \ u'him should not perish, but have everlasting life.'
def test_load_finpr_zip(self): """ Test that the FinPR.zip file is loaded correctly. """ # GIVEN: The FinPR.zip file modules = SwordModules(os.path.join(TEST_RESOURCE_FOLDER, u'FinPR.zip')) # WHEN: Parsing the FinPR module and reading a passage. found_modules = modules.parse_modules() bible = modules.get_bible_from_module(u'FinPR') output = bible.get(books=[u'john'], chapters=[3], verses=[16]) # THEN: The FinPR module should be the only one found. And the passage should be equal to the known text. assert u'FinPR' in found_modules assert len(found_modules) == 1 assert output == u'Sillä niin on Jumala maailmaa rakastanut, että hän antoi ainokaisen Poikansa, ' \ u'ettei yksikään, joka häneen uskoo, hukkuisi, vaan hänellä olisi iankaikkinen elämä.'
def test_load_chipinyin_zip(self): """ Test that the chipinyin.zip file is loaded correctly. """ # GIVEN: The chipinyin.zip file modules = SwordModules( os.path.join(TEST_RESOURCE_FOLDER, u'chipinyin.zip')) # WHEN: Parsing the chipinyin module and reading a passage. found_modules = modules.parse_modules() bible = modules.get_bible_from_module(u'ChiPinyin') output = bible.get(books=[u'john'], chapters=[3], verses=[16]) # THEN: The ChiPinyin module should be the only one found. And the passage should be equal to the known text. assert u'ChiPinyin' in found_modules assert len(found_modules) == 1 assert output == u' Shén aì shìrén , shènzhì jiāng tāde dú shēng zǐ cìgĕi tāmen , jiào yīqiè xìn tāde , bú ' \ u'zhì mièwáng , fǎn dé yǒngshēng . '
def test_parse_modules_zip(self): """ Test that conf file from a zip can be parsed. """ # GIVEN: A SwordModules object using a folder for input modules = SwordModules(os.path.join(TEST_RESOURCE_FOLDER, u'FinPR.zip')) # WHEN: parsing the modules conf files mods_metadata = modules.parse_modules() # THEN: Modules should be detectable and information extractable self.assertEqual(1, len(mods_metadata.keys()), u'There should be only 1 module in a zip.') self.assertIn(u'FinPR', mods_metadata.keys(), u'FinPR should be available') self.assertEqual(mods_metadata['FinPR']['description'], u'Finnish Pyhä Raamattu (1933/1938)', u'Could not extract "description" for "FinPR"')
def test_load_aranav_zip(self): """ Test that the AraNAV.zip file is loaded correctly. """ # GIVEN: The AraNAV.zip file modules = SwordModules( os.path.join(TEST_RESOURCE_FOLDER, u'AraNAV.zip')) # WHEN: Parsing the AraNAV module and reading a passage. found_modules = modules.parse_modules() bible = modules.get_bible_from_module(u'AraNAV') output = bible.get(books=[u'rev'], chapters=[22], verses=[19]) # THEN: The AraNAV module should be the only one found. assert u'AraNAV' in found_modules assert len(found_modules) == 1 # The passage should be empty since that verse is not in this translation. assert output.strip( ) == u' وَلْتَكُنْ نِعْمَةُ رَبِّنَا يَسُوعَ الْمَسِيحِ مَعَكُمْ جَمِيعاً.'.strip( )
def _import_bible_format(): prog_dict = {} prog_dict[FORMAT_MYBIBLE] = MyBibleFormat() prog_dict[FORMAT_MYSWORD] = MySwordFormat() try: from pysword.modules import SwordModules modules = SwordModules() found_modules = modules.parse_modules() prog_dict[FORMAT_SWORD] = SwordFormat(modules, found_modules) except ImportError: pass except FileNotFoundError: pass prog_dict[FORMAT_ZEFANIA] = ZefaniaFormat() return prog_dict
def test_get_bible_from_module(self, mocked_sword_bible): """ Test that the assigning of default values works. """ # GIVEN: A SwordModules object modules = SwordModules(u'test_sword_path') modules._modules = { u'test_key': { u'datapath': u'test_path', u'moddrv': u'test_mod_type' } } # WHEN: Requesting a bible from a module bible = modules.get_bible_from_module(u'test_key') # THEN: It should succeed # Check that the returned mock bible has created with default values self.assertIsNotNone(bible, u'Returned bible should not be None') mocked_sword_bible.assert_called_with( os.path.join(u'test_sword_path', u'test_path'), u'test_mod_type', u'kjv', None, None, BlockType.BOOK, CompressType.ZIP, None)
def listLanguages(self, c, a): result_object = Result() result = [] modules = SwordModules() try: found_modules = modules.parse_modules() except FileNotFoundError: result_object.category = "error" result_object.error = "no sword modules could be found!" else: for main_key in found_modules: language = found_modules[main_key]['lang'] if not language in result: result.append(language) result = sorted(result) result_object.category = "list" result_object.payload = result return result_object
def generate_sqlite(source, bible_version): # Create sqlite file db = sqlite3.connect(bible_version + ".sqlite", isolation_level=None) # Create table if not exists db.cursor().execute("CREATE TABLE IF NOT EXISTS key_" + bible_version + " (book Int,name varchar);") db.cursor().execute("CREATE TABLE IF NOT EXISTS " + bible_version + " (book Int,chapter Int,verse Int,text varchar);") # Define query query_key = "INSERT INTO key_" + bible_version + " values (?,?)" query_verse = "INSERT INTO " + bible_version + " values (?,?,?,?)" query_delete = "DELETE FROM " # Remove table if any exists db.cursor().execute(query_delete + "key_" + bible_version) db.cursor().execute(query_delete + bible_version) # Get books data from module modules = SwordModules(source) found_modules = modules.parse_modules() bible = modules.get_bible_from_module(bible_version) books = bible.get_structure()._books['ot'] + bible.get_structure( )._books['nt'] for idx, book in enumerate(books, start=1): loadingBar(idx, len(books), 2) db.cursor().execute(query_key, (idx, book.name)) for chapter in xrange(1, book.num_chapters + 1): for verse in xrange(1, len(book.get_indicies(chapter)) + 1): db.cursor().execute(query_verse, (idx, chapter, verse, bible.get(books=[book.name], chapters=[chapter], verses=[verse]))) print("Yay!!")
def __init__(self, left, right, modules_dir="Modules/", hyphen_lang="en_GB"): self.left = left self.right = right modules = SwordModules(modules_dir + left + '.zip') found_modules = modules.parse_modules() self.bible1 = modules.get_bible_from_module(left) self.books1 = self.bible1.get_structure().get_books() self.books1 = self.books1.get('ot', []) + self.books1.get('nt', []) modules = SwordModules(modules_dir + right + '.zip') found_modules = modules.parse_modules() self.bible2 = modules.get_bible_from_module(right) self.books2 = self.bible2.get_structure().get_books() self.books2 = self.books2['ot'] + self.books2['nt'] self.uuid = str(uuid.uuid4()) self.hyphenator2 = pyphen.Pyphen(lang=hyphen_lang)
def get_bible_json(path, overwrite): modules = SwordModules(path) found_modules = modules.parse_modules() keys = found_modules.keys() version = list(keys)[0] validate = Validate(version) report = Report(version) validate.keys(len(keys)) comparator = get_comparator(version) exists_obj = does_bible_json_exist(version) if exists_obj['exists'] and not overwrite: print(f'{version} - skipping') return None bible = modules.get_bible_from_module(version) # noinspection PyProtectedMember raw_books = bible.get_structure()._books['ot'] + bible.get_structure()._books['nt'] validate.books(len(raw_books)) print('==================================================') print(f'{version} - processing in progress, please wait') chapter_count = 0 all_verses = [] omitted_verses = [] actual_eoc_differences = [] books = [] start = time.time() for book_idx, book in enumerate(raw_books): # TODO: Add more checks based on book info # pprint(vars(book)) report.processed(book_idx + 1, book.osis_name, start) range_chapters = range(1, book.num_chapters + 1) chapters = [] for chapter in range_chapters: raw_verses = book.get_indicies(chapter) verses = [] for verseIdx, verse in enumerate(raw_verses): verse_ref = book.osis_name + ' ' + str(chapter) + ':' + str(verseIdx + 1) try: text = bible.get(books=[book.name], chapters=[chapter], verses=[verseIdx + 1]) except Exception as e: if 'incorrect header' in str(e): if verse_ref in omissible_verses or verse_ref in permissible_eoc_differences: text = None else: print(f'{version} - aborting with error on {verse_ref}') print(f'{version} - {str(e)}') return None else: raise e # if verse_ref == 'Romans 16:25': # print(f'{verse_ref} = "{text}"') if text is not None: text = text.strip() if text == '': text = None if text is None: if verse_ref in omissible_verses: omitted_verses.append(verse_ref) elif verse_ref in permissible_eoc_differences: pass else: raise Exception(f'{version} - text is none for {verse_ref}') else: if comparator is not None: text = comparator.compare(text) verse = { 'verse': verseIdx + 1, 'chapter': chapter, 'ref': verse_ref, 'text': text } verses.append(verse) all_verses.append(verse) # TODO: Add testament chapters.append({ 'chapter': chapter, 'name': book.osis_name + ' ' + str(chapter), 'verses': verses }) chapter_count += len(chapters) books.append({ 'name': book.osis_name, 'chapters': chapters }) for eoc_difference in permissible_eoc_differences: eoc_verse = next((x for x in all_verses if x['ref'] == eoc_difference), None) if eoc_verse is None: actual_eoc_differences.append(eoc_difference) report.summary(len(books), chapter_count, len(all_verses)) report.omitted(omitted_verses) report.eoc(actual_eoc_differences) validate.chapters(chapter_count) passed_extra_verse_checks = validate.verses(len(all_verses), actual_eoc_differences, omitted_verses) return { 'version': version, 'omittedVerses': omitted_verses, 'endOfChapterDifferences': actual_eoc_differences, 'verseCount': len(all_verses), 'passedExtraVerseChecks': passed_extra_verse_checks, 'books': books }
def get_bible_dict(source_file, bible_version, output_path, current_counter, next_counter): # set some counter values counter = int(current_counter) current = int(current_counter) next = int(next_counter) # load the sword module module = SwordModules(source_file) # get the config module_config = module.parse_modules()[bible_version] # load the bible version bible_mod = module.get_bible_from_module(bible_version) # load the list of books per/testament testaments = bible_mod.get_structure()._books # merge the books books = [] for testament in testaments: books += testaments[testament] # set the abbreviation abbreviation = v1_translation_names.get(bible_version, bible_version.lower()) # get v1 Book Names (some are in the same language) v1_book_names = {} # check if this translations was in v1 if bible_version in v1_translation_names: try: v1_book_names = requests.get('https://getbible.net/v1/' + abbreviation + '/books.json').json() except ValueError: # no json found v1_book_names = {} # start to build the complete scripture of the translation bible_ = {} bible_['translation'] = v1_translations.get( abbreviation, module_config.get('description', bible_version)) bible_['abbreviation'] = abbreviation bible_['discription'] = module_config.get('description', '') # set language lang_ = module_config.get('lang', '') bible_['lang'] = lang_correction.get(lang_, lang_) bible_['language'] = language_names.get(lang_, '') bible_['direction'] = text_direction.get(lang_, 'LTR') # not sure if this is relevant seeing that json.dump ensure_ascii=True bible_['encoding'] = module_config.get('encoding', '') # set global book bible_book = { 'translation': bible_.get('translation'), 'abbreviation': abbreviation, 'lang': bible_.get('lang'), 'language': bible_.get('language'), 'direction': bible_.get('direction', 'LTR'), 'encoding': bible_.get('encoding') } # set global chapter bible_chapter = { 'translation': bible_.get('translation'), 'abbreviation': abbreviation, 'lang': bible_.get('lang'), 'language': bible_.get('language'), 'direction': bible_.get('direction', 'LTR'), 'encoding': bible_.get('encoding') } # start building the books bible_['books'] = [] for book in books: # add the book only if it has verses book_has_verses = False # reset chapter bucket chapters = [] # set book number book_nr = book_numbers.get(book.name) # get book name as set in v1 book_name = v1_book_names.get(str(book_nr), {}).get( 'name', book_names.get(book.name, book.name)) # get book path book_path = os.path.join(output_path, bible_.get('abbreviation'), str(book_nr)) # check if path is set check_path(book_path) # add the book only if it has verses chapter_has_verses = False for chapter in xrange(1, book.num_chapters + 1): # reset verse bucket verses = [] for verse in xrange(1, len(book.get_indicies(chapter)) + 1): text = bible_mod.get(books=[book.name], chapters=[chapter], verses=[verse]) _text = text.replace('[]', '') if len(text) > 0 and not _text.isspace(): book_has_verses = True chapter_has_verses = True verses.append({ 'chapter': chapter, 'verse': verse, 'name': book_name + " " + str(chapter) + ":" + str(verse), 'text': text }) if chapter_has_verses: # load to complete Bible chapters.append({ 'chapter': chapter, 'name': book_name + " " + str(chapter), 'verses': verses }) # set chapter bible_chapter['book_nr'] = book_nr bible_chapter['book_name'] = book_name bible_chapter['chapter'] = chapter bible_chapter['name'] = book_name + " " + str(chapter) bible_chapter['verses'] = verses # store to chapter file write_json(bible_chapter, os.path.join(book_path, str(chapter) + '.json')) print('XXX\n{}\nChapter {} was added to {}-{}\nXXX'.format( counter, chapter, book_name, abbreviation)) counter = increment_counter(counter, next, current) if book_has_verses: # load to complete Bible bible_['books'].append({ 'nr': book_nr, 'name': book_name, 'chapters': chapters }) # set book bible_book['nr'] = book_nr bible_book['name'] = book_name bible_book['chapters'] = chapters # store to book file write_json(bible_book, book_path + '.json') print('XXX\n{}\nBook ({}) was added to {}\nXXX'.format( counter, book_name, abbreviation)) counter = increment_counter(counter, next, current) # add distribution info bible_['distribution_lcsh'] = module_config.get('lcsh', '') bible_['distribution_version'] = module_config.get('version', '') bible_['distribution_version_date'] = module_config.get( 'SwordVersionDate', module_config.get('swordversiondate', '')) bible_['distribution_abbreviation'] = module_config.get( 'abbreviation', abbreviation) bible_['distribution_about'] = module_config.get('about', '') bible_['distribution_license'] = module_config.get('distributionlicense', '') bible_['distribution_sourcetype'] = module_config.get('sourcetype', '') bible_['distribution_source'] = module_config.get('textsource', '') bible_['distribution_versification'] = module_config.get( 'versification', '') # load the distribution history bible_['distribution_history'] = {} for k, v in module_config.items(): if 'history' in k: bible_['distribution_history'][k] = v return bible_
def word(self, command, args): result = None result_object = Result() modules = SwordModules() try: found_modules = modules.parse_modules() except FileNotFoundError: result_object.error = 'no sword modules found on this computer. please install some!' else: try: bible = modules.get_bible_from_module(self.current_module) try: book = args[0] import modules.sword.book_names.books_de as books_de if book in books_de.books: book = books_de.books[book] if len(args) == 2: result = bible.get(books=[book], chapters=[int(args[1])], clean=True, join='#|#') splitted = result.split('#|#') result = [] for i, line in enumerate(splitted): result.append([i + 1, line.strip()]) elif args[2].find('-') > -1: verse_min, verse_max = args[2].split('-') verse_range = range(int(verse_min), int(verse_max) + 1) try: result = bible.get(books=[book], chapters=[int(args[1])], verses=verse_range, clean=True, join='#|#') except IndexError: result_object.error = 'invalid verse range' else: splitted = result.split('#|#') result = [] for i, line in enumerate(splitted): result.append( [i + int(verse_min), line.strip()]) else: verse_range = int(args[2]) result = bible.get(books=[book], chapters=[int(args[1])], verses=verse_range, clean=True, join='\n') except ValueError as e: result_object.error = str(e) except KeyError as e: result_object.error = 'book not found in current bible: ' + str( book) + "\n\n" + str(e) except IndexError as e: result_object.error = 'invalid input. please have a look at the man-page' + "\n\n" + str( e) except KeyError: result_object.error = 'current module does not exist: ' + self.current_module except ValueError as e: result_object.error = str(e) result_object.category = "text" if result: for item in args: command += ' ' + str(item) if type(result) == list: result.insert(0, command) elif type(result) == str: result = [result] result.insert(0, command) result_object.payload = result return result_object
from pysword.modules import SwordModules modules = SwordModules("../ESV2011.zip") found_modules = modules.parse_modules() bible = modules.get_bible_from_module("ESV2011") #print(dir(bible.get_structure().get_books())) #print(bible.get_structure().get_books()) output = bible.get(books=["john"], chapters=[1], verses=range(1,3), clean=False) print(output)
from pysword.modules import SwordModules import re # Load module in zip # NB: the zip content is only available as long as the SwordModules object exists modules = SwordModules('KJV.zip') # In this case the module found is: # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}} found_modules = modules.parse_modules() bible = modules.get_bible_from_module('KJV') # Get John chapter 3 verse 16 #output = bible.get(books=['john'], chapters=[3], verses=[16], clean=False) #print bible.get_structure() #print output f = open('text.txt', 'w') n = 0 contents = u'' for item in bible.get_iter(clean=False): sentens = re.findall( r'<w lemma="(.+?)"( morph="(.+?)")?( src=".+?")?>(.+?)</w>(.)', item) for word in sentens: strongs = re.findall(r'strong:((G|H)\d+)', word[0]) morphs = re.findall(r'(robinson|strongMorph):(\S+)', word[2]) ww = word[4] comma = word[5] contents = contents + ww comp1 = zip(strongs, morphs) for strong, morph in comp1:
def get_bible_json(path, overwrite): # load sword module modules = SwordModules(path) found_modules = modules.parse_modules() keys = found_modules.keys() assert len(keys) == 1 sword_version = list(keys)[0] version = lookup_version_abbr(sword_version) module = found_modules[sword_version] report = Report(version) print('==================================================') print(f'{version} - processing in progress, please wait') # pprint(module) # get metadata language = module['lang'] meta = { 'description': module['description'] if module.get('description') and 'Strong' not in module['description'] else None, 'source': 'sword', 'swordVersion': module.get('version'), 'swordVersionDate': module.get('swordversiondate'), 'encoding': module['encoding'].lower() if module.get('encoding') else None, 'language': language, 'license': module['distributionlicense'] if module.get('distributionlicense') and 'Strong' not in module['distributionlicense'] else None, 'copyright': module.get('copyright') or module.get('shortcopyright') } actual_encoding = meta['encoding'] assert actual_encoding == default_encoding or actual_encoding is None, f'{version} - expected module encoding {default_encoding} but got {actual_encoding}' # skip if JSON exists exists_obj = does_bible_json_exist(version, language) if exists_obj['exists'] and not overwrite: print(f'{version} - skipping, already exists') return None # get raw bible books # noinspection PyBroadException try: bible = modules.get_bible_from_module(sword_version) except Exception as e: print(f'{version} - aborting, pysword failure .. {e}') return None assert bible._encoding == default_encoding or bible._encoding is None, f'{version} - expected bible encoding {default_encoding} but got {bible._encoding}' bible_structure = bible.get_structure() assert bible_structure._book_offsets is None if bible_structure._books.get('ot') is None: print(f'{version} - aborting, old testament missing') return None if bible_structure._books.get('nt') is None: print(f'{version} - aborting, new testament missing') return None raw_books = bible_structure._books['ot'] + bible_structure._books['nt'] assert len(raw_books) == 66 # init processing start = time.time() chapter_count = 0 verse_count = 0 # main processing books = [] for book_idx, book in enumerate(raw_books): report.processed(book_idx + 1, book.osis_name, start) range_chapters = range(0, book.num_chapters) chapters = [] for chapter_idx in range_chapters: chapter_num = chapter_idx + 1 raw_verses = book.get_indicies(chapter_num) verses = [] for verse_idx, xxxxx in enumerate(raw_verses): verse_num = verse_idx + 1 try: text = bible.get(books=[book.name], chapters=[chapter_num], verses=[verse_num], clean=True) except Exception as e: if 'incorrect header' in str(e): text = None else: raise e # if text is not None and version == 'ASV': # text = text.encode('latin-1').decode('cp1252') if text is not None: # TIDYUP - trim text = text.strip() if text == '': text = None if text is not None: # TIDYUP - remove double spaces while ' ' in text: text = text.replace(' ', ' ') # TIDYUP - replace horrible chars text = text.replace('\u2013', '-') text = text.replace('\u2019', '\'') verses.append({'number': verse_num, 'text': text}) verse_count += 1 chapters.append({'number': chapter_num, 'verses': verses}) chapter_count += 1 books.append({ 'name': book.osis_name, 'verses_per_chapter': book.chapter_lengths, 'chapters': chapters, }) print() report.summary(len(books), chapter_count, verse_count) assert chapter_count == 1189 return { 'version': version, 'versionName': lookup_version_name(sword_version), 'meta': meta, 'books': books }