def do_import(self, bible_name=None): """ Loads a Bible from file. """ log.debug('Starting OSIS import from "%s"' % self.filename) if not isinstance(self.filename, str): self.filename = str(self.filename, 'utf8') import_file = None success = True try: # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding # detection, and the two mechanisms together interfere with each other. import_file = open(self.filename, 'rb') osis_bible_tree = etree.parse(import_file, parser=etree.XMLParser(recover=True)) namespace = { 'ns': 'http://www.bibletechnologies.net/2003/OSIS/namespace' } # Find bible language language_id = None language = osis_bible_tree.xpath("//ns:osisText/@xml:lang", namespaces=namespace) if language: language_id = BiblesResourcesDB.get_language(language[0]) # The language couldn't be detected, ask the user if not language_id: language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) return False self.save_meta('language_id', language_id) num_books = int( osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=namespace)) self.wizard.increment_progress_bar( translate( 'BiblesPlugin.OsisImport', 'Removing unused tags (this may take a few minutes)...')) # We strip unused tags from the XML, this should leave us with only chapter, verse and div tags. # Strip tags we don't use - remove content etree.strip_elements(osis_bible_tree, ( '{http://www.bibletechnologies.net/2003/OSIS/namespace}note', '{http://www.bibletechnologies.net/2003/OSIS/namespace}milestone', '{http://www.bibletechnologies.net/2003/OSIS/namespace}title', '{http://www.bibletechnologies.net/2003/OSIS/namespace}abbr', '{http://www.bibletechnologies.net/2003/OSIS/namespace}catchWord', '{http://www.bibletechnologies.net/2003/OSIS/namespace}index', '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdg', '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdgGroup', '{http://www.bibletechnologies.net/2003/OSIS/namespace}figure' ), with_tail=False) # Strip tags we don't use - keep content etree.strip_tags(osis_bible_tree, ( '{http://www.bibletechnologies.net/2003/OSIS/namespace}p', '{http://www.bibletechnologies.net/2003/OSIS/namespace}l', '{http://www.bibletechnologies.net/2003/OSIS/namespace}lg', '{http://www.bibletechnologies.net/2003/OSIS/namespace}q', '{http://www.bibletechnologies.net/2003/OSIS/namespace}a', '{http://www.bibletechnologies.net/2003/OSIS/namespace}w', '{http://www.bibletechnologies.net/2003/OSIS/namespace}divineName', '{http://www.bibletechnologies.net/2003/OSIS/namespace}foreign', '{http://www.bibletechnologies.net/2003/OSIS/namespace}hi', '{http://www.bibletechnologies.net/2003/OSIS/namespace}inscription', '{http://www.bibletechnologies.net/2003/OSIS/namespace}mentioned', '{http://www.bibletechnologies.net/2003/OSIS/namespace}name', '{http://www.bibletechnologies.net/2003/OSIS/namespace}reference', '{http://www.bibletechnologies.net/2003/OSIS/namespace}seg', '{http://www.bibletechnologies.net/2003/OSIS/namespace}transChange', '{http://www.bibletechnologies.net/2003/OSIS/namespace}salute', '{http://www.bibletechnologies.net/2003/OSIS/namespace}signed', '{http://www.bibletechnologies.net/2003/OSIS/namespace}closer', '{http://www.bibletechnologies.net/2003/OSIS/namespace}speech', '{http://www.bibletechnologies.net/2003/OSIS/namespace}speaker', '{http://www.bibletechnologies.net/2003/OSIS/namespace}list', '{http://www.bibletechnologies.net/2003/OSIS/namespace}item', '{http://www.bibletechnologies.net/2003/OSIS/namespace}table', '{http://www.bibletechnologies.net/2003/OSIS/namespace}head', '{http://www.bibletechnologies.net/2003/OSIS/namespace}row', '{http://www.bibletechnologies.net/2003/OSIS/namespace}cell', '{http://www.bibletechnologies.net/2003/OSIS/namespace}caption' )) # Precompile a few xpath-querys verse_in_chapter = etree.XPath('count(//ns:chapter[1]/ns:verse)', namespaces=namespace) text_in_verse = etree.XPath('count(//ns:verse[1]/text())', namespaces=namespace) # Find books in the bible bible_books = osis_bible_tree.xpath("//ns:div[@type='book']", namespaces=namespace) for book in bible_books: if self.stop_import_flag: break # Remove div-tags in the book etree.strip_tags(book, ( '{http://www.bibletechnologies.net/2003/OSIS/namespace}div' )) book_ref_id = self.get_book_ref_id_by_name( book.get('osisID'), num_books, language_id) if not book_ref_id: book_ref_id = self.get_book_ref_id_by_localised_name( book.get('osisID')) if not book_ref_id: log.error('Importing books from "%s" failed' % self.filename) return False book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) db_book = self.create_book(book_details['name'], book_ref_id, book_details['testament_id']) # Find out if chapter-tags contains the verses, or if it is used as milestone/anchor if int(verse_in_chapter(book)) > 0: # The chapter tags contains the verses for chapter in book: chapter_number = chapter.get("osisID").split('.')[1] # Find out if verse-tags contains the text, or if it is used as milestone/anchor if int(text_in_verse(chapter)) == 0: # verse-tags are used as milestone for verse in chapter: # If this tag marks the start of a verse, the verse text is between this tag and # the next tag, which the "tail" attribute gives us. if verse.get('sID'): verse_number = verse.get("osisID").split( '.')[2] verse_text = verse.tail if verse_text: self.create_verse( db_book.id, chapter_number, verse_number, verse_text.strip()) else: # Verse-tags contains the text for verse in chapter: verse_number = verse.get("osisID").split( '.')[2] if verse.text: self.create_verse(db_book.id, chapter_number, verse_number, verse.text.strip()) self.wizard.increment_progress_bar( translate('BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...') % { 'bookname': db_book.name, 'chapter': chapter_number }) else: # The chapter tags is used as milestones. For now we assume verses is also milestones chapter_number = 0 for element in book: if element.tag == '{http://www.bibletechnologies.net/2003/OSIS/namespace}chapter' \ and element.get('sID'): chapter_number = element.get("osisID").split( '.')[1] self.wizard.increment_progress_bar( translate( 'BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...') % { 'bookname': db_book.name, 'chapter': chapter_number }) elif element.tag == '{http://www.bibletechnologies.net/2003/OSIS/namespace}verse' \ and element.get('sID'): # If this tag marks the start of a verse, the verse text is between this tag and # the next tag, which the "tail" attribute gives us. verse_number = element.get("osisID").split('.')[2] verse_text = element.tail if verse_text: self.create_verse(db_book.id, chapter_number, verse_number, verse_text.strip()) self.session.commit() self.application.process_events() except (ValueError, IOError): log.exception('Loading bible from OSIS file failed') trace_error_handler(log) success = False except etree.XMLSyntaxError as e: log.exception('Loading bible from OSIS file failed') trace_error_handler(log) success = False critical_error_message_box(message=translate( 'BiblesPlugin.OsisImport', 'The file is not a valid OSIS-XML file: \n%s' % e.msg)) finally: if import_file: import_file.close() if self.stop_import_flag: return False else: return success
def do_import(self, bible_name=None): """ Loads a Bible from file. """ log.debug(u'Starting OSIS import from "%s"' % self.filename) detect_file = None db_book = None osis = None success = True last_chapter = 0 match_count = 0 self.wizard.incrementProgressBar(translate('BiblesPlugin.OsisImport', 'Detecting encoding (this may take a few minutes)...')) try: detect_file = open(self.filename, u'r') details = chardet.detect(detect_file.read(1048576)) detect_file.seek(0) lines_in_file = int(len(detect_file.readlines())) except IOError: log.exception(u'Failed to detect OSIS file encoding') return finally: if detect_file: detect_file.close() try: osis = codecs.open(self.filename, u'r', details['encoding']) repl = replacement language_id = False # Decide if the bible propably contains only NT or AT and NT or # AT, NT and Apocrypha if lines_in_file < 11500: book_count = 27 chapter_count = 260 elif lines_in_file < 34200: book_count = 66 chapter_count = 1188 else: book_count = 67 chapter_count = 1336 for file_record in osis: if self.stop_import_flag: break # Try to find the bible language if not language_id: language_match = self.language_regex.search(file_record) if language_match: language = BiblesResourcesDB.get_language( language_match.group(1)) if language: language_id = language[u'id'] self.save_meta(u'language_id', language_id) continue match = self.verse_regex.search(file_record) if match: # Set meta language_id if not detected till now if not language_id: language_id = self.get_language(bible_name) if not language_id: log.exception(u'Importing books from "%s" failed' % self.filename) return False match_count += 1 book = unicode(match.group(1)) chapter = int(match.group(2)) verse = int(match.group(3)) verse_text = match.group(4) book_ref_id = self.get_book_ref_id_by_name(book, book_count, language_id) if not book_ref_id: log.exception(u'Importing books from "%s" failed' % self.filename) return False book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) if not db_book or db_book.name != book_details[u'name']: log.debug(u'New book: "%s"' % book_details[u'name']) db_book = self.create_book( book_details[u'name'], book_ref_id, book_details[u'testament_id']) if last_chapter == 0: self.wizard.progressBar.setMaximum(chapter_count) if last_chapter != chapter: if last_chapter != 0: self.session.commit() self.wizard.incrementProgressBar(translate('BiblesPlugin.OsisImport', 'Importing %s %s...', 'Importing <book name> <chapter>...') % (book_details[u'name'], chapter)) last_chapter = chapter # All of this rigmarol below is because the mod2osis # tool from the Sword library embeds XML in the OSIS # but neglects to enclose the verse text (with XML) in # <[CDATA[ ]]> tags. verse_text = self.note_regex.sub(u'', verse_text) verse_text = self.title_regex.sub(u'', verse_text) verse_text = self.milestone_regex.sub(u'', verse_text) verse_text = self.fi_regex.sub(u'', verse_text) verse_text = self.rf_regex.sub(u'', verse_text) verse_text = self.lb_regex.sub(u' ', verse_text) verse_text = self.lg_regex.sub(u'', verse_text) verse_text = self.l_regex.sub(u' ', verse_text) verse_text = self.w_regex.sub(u'', verse_text) verse_text = self.q1_regex.sub(u'"', verse_text) verse_text = self.q2_regex.sub(u'\'', verse_text) verse_text = self.q_regex.sub(u'', verse_text) verse_text = self.divine_name_regex.sub(repl, verse_text) verse_text = self.trans_regex.sub(u'', verse_text) verse_text = verse_text.replace(u'</lb>', u'') \ .replace(u'</l>', u'').replace(u'<lg>', u'') \ .replace(u'</lg>', u'').replace(u'</q>', u'') \ .replace(u'</div>', u'').replace(u'</w>', u'') verse_text = self.spaces_regex.sub(u' ', verse_text) self.create_verse(db_book.id, chapter, verse, verse_text) self.application.process_events() self.session.commit() if match_count == 0: success = False except (ValueError, IOError): log.exception(u'Loading bible from OSIS file failed') success = False finally: if osis: osis.close() if self.stop_import_flag: return False else: return success
def do_import(self, bible_name=None): """ Loads a Bible from file. """ log.debug('Starting Zefania import from "%s"' % self.filename) if not isinstance(self.filename, str): self.filename = str(self.filename, 'utf8') import_file = None success = True try: # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding # detection, and the two mechanisms together interfere with each other. import_file = open(self.filename, 'rb') zefania_bible_tree = etree.parse(import_file, parser=etree.XMLParser(recover=True)) # Find bible language language_id = None language = zefania_bible_tree.xpath("/XMLBIBLE/INFORMATION/language/text()") if language: language_id = BiblesResourcesDB.get_language(language[0]) # The language couldn't be detected, ask the user if not language_id: language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) return False self.save_meta('language_id', language_id) num_books = int(zefania_bible_tree.xpath("count(//BIBLEBOOK)")) # Strip tags we don't use - keep content etree.strip_tags(zefania_bible_tree, ('STYLE', 'GRAM', 'NOTE', 'SUP', 'XREF')) # Strip tags we don't use - remove content etree.strip_elements(zefania_bible_tree, ('PROLOG', 'REMARK', 'CAPTION', 'MEDIA'), with_tail=False) xmlbible = zefania_bible_tree.getroot() for BIBLEBOOK in xmlbible: if self.stop_import_flag: break bname = BIBLEBOOK.get('bname') bnumber = BIBLEBOOK.get('bnumber') if not bname and not bnumber: continue if bname: book_ref_id = self.get_book_ref_id_by_name(bname, num_books, language_id) if not book_ref_id: book_ref_id = self.get_book_ref_id_by_localised_name(bname) else: log.debug('Could not find a name, will use number, basically a guess.') book_ref_id = int(bnumber) if not book_ref_id: log.error('Importing books from "%s" failed' % self.filename) return False book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) db_book = self.create_book(book_details['name'], book_ref_id, book_details['testament_id']) for CHAPTER in BIBLEBOOK: if self.stop_import_flag: break chapter_number = CHAPTER.get("cnumber") for VERS in CHAPTER: verse_number = VERS.get("vnumber") self.create_verse(db_book.id, chapter_number, verse_number, VERS.text.replace('<BR/>', '\n')) self.wizard.increment_progress_bar( translate('BiblesPlugin.Zefnia', 'Importing %(bookname)s %(chapter)s...') % {'bookname': db_book.name, 'chapter': chapter_number}) self.session.commit() self.application.process_events() except Exception as e: critical_error_message_box( message=translate('BiblesPlugin.ZefaniaImport', 'Incorrect Bible file type supplied. Zefania Bibles may be ' 'compressed. You must decompress them before import.')) log.exception(str(e)) success = False finally: if import_file: import_file.close() if self.stop_import_flag: return False else: return success
def do_import(self, bible_name=None): """ Loads a Bible from file. """ log.debug('Starting OSIS import from "%s"' % self.filename) if not isinstance(self.filename, str): self.filename = str(self.filename, 'utf8') import_file = None success = True try: # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding # detection, and the two mechanisms together interfere with each other. import_file = open(self.filename, 'rb') osis_bible_tree = etree.parse(import_file, parser=etree.XMLParser(recover=True)) namespace = {'ns': 'http://www.bibletechnologies.net/2003/OSIS/namespace'} # Find bible language language_id = None language = osis_bible_tree.xpath("//ns:osisText/@xml:lang", namespaces=namespace) if language: language_id = BiblesResourcesDB.get_language(language[0]) # The language couldn't be detected, ask the user if not language_id: language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) return False self.save_meta('language_id', language_id) num_books = int(osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=namespace)) self.wizard.increment_progress_bar(translate('BiblesPlugin.OsisImport', 'Removing unused tags (this may take a few minutes)...')) # We strip unused tags from the XML, this should leave us with only chapter, verse and div tags. # Strip tags we don't use - remove content etree.strip_elements(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}note', '{http://www.bibletechnologies.net/2003/OSIS/namespace}milestone', '{http://www.bibletechnologies.net/2003/OSIS/namespace}title', '{http://www.bibletechnologies.net/2003/OSIS/namespace}abbr', '{http://www.bibletechnologies.net/2003/OSIS/namespace}catchWord', '{http://www.bibletechnologies.net/2003/OSIS/namespace}index', '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdg', '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdgGroup', '{http://www.bibletechnologies.net/2003/OSIS/namespace}figure'), with_tail=False) # Strip tags we don't use - keep content etree.strip_tags(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}p', '{http://www.bibletechnologies.net/2003/OSIS/namespace}l', '{http://www.bibletechnologies.net/2003/OSIS/namespace}lg', '{http://www.bibletechnologies.net/2003/OSIS/namespace}q', '{http://www.bibletechnologies.net/2003/OSIS/namespace}a', '{http://www.bibletechnologies.net/2003/OSIS/namespace}w', '{http://www.bibletechnologies.net/2003/OSIS/namespace}divineName', '{http://www.bibletechnologies.net/2003/OSIS/namespace}foreign', '{http://www.bibletechnologies.net/2003/OSIS/namespace}hi', '{http://www.bibletechnologies.net/2003/OSIS/namespace}inscription', '{http://www.bibletechnologies.net/2003/OSIS/namespace}mentioned', '{http://www.bibletechnologies.net/2003/OSIS/namespace}name', '{http://www.bibletechnologies.net/2003/OSIS/namespace}reference', '{http://www.bibletechnologies.net/2003/OSIS/namespace}seg', '{http://www.bibletechnologies.net/2003/OSIS/namespace}transChange', '{http://www.bibletechnologies.net/2003/OSIS/namespace}salute', '{http://www.bibletechnologies.net/2003/OSIS/namespace}signed', '{http://www.bibletechnologies.net/2003/OSIS/namespace}closer', '{http://www.bibletechnologies.net/2003/OSIS/namespace}speech', '{http://www.bibletechnologies.net/2003/OSIS/namespace}speaker', '{http://www.bibletechnologies.net/2003/OSIS/namespace}list', '{http://www.bibletechnologies.net/2003/OSIS/namespace}item', '{http://www.bibletechnologies.net/2003/OSIS/namespace}table', '{http://www.bibletechnologies.net/2003/OSIS/namespace}head', '{http://www.bibletechnologies.net/2003/OSIS/namespace}row', '{http://www.bibletechnologies.net/2003/OSIS/namespace}cell', '{http://www.bibletechnologies.net/2003/OSIS/namespace}caption')) # Precompile a few xpath-querys verse_in_chapter = etree.XPath('count(//ns:chapter[1]/ns:verse)', namespaces=namespace) text_in_verse = etree.XPath('count(//ns:verse[1]/text())', namespaces=namespace) # Find books in the bible bible_books = osis_bible_tree.xpath("//ns:div[@type='book']", namespaces=namespace) for book in bible_books: if self.stop_import_flag: break # Remove div-tags in the book etree.strip_tags(book, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}div')) book_ref_id = self.get_book_ref_id_by_name(book.get('osisID'), num_books, language_id) if not book_ref_id: book_ref_id = self.get_book_ref_id_by_localised_name(book.get('osisID')) if not book_ref_id: log.error('Importing books from "%s" failed' % self.filename) return False book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) db_book = self.create_book(book_details['name'], book_ref_id, book_details['testament_id']) # Find out if chapter-tags contains the verses, or if it is used as milestone/anchor if int(verse_in_chapter(book)) > 0: # The chapter tags contains the verses for chapter in book: chapter_number = chapter.get("osisID").split('.')[1] # Find out if verse-tags contains the text, or if it is used as milestone/anchor if int(text_in_verse(chapter)) == 0: # verse-tags are used as milestone for verse in chapter: # If this tag marks the start of a verse, the verse text is between this tag and # the next tag, which the "tail" attribute gives us. if verse.get('sID'): verse_number = verse.get("osisID").split('.')[2] verse_text = verse.tail if verse_text: self.create_verse(db_book.id, chapter_number, verse_number, verse_text.strip()) else: # Verse-tags contains the text for verse in chapter: verse_number = verse.get("osisID").split('.')[2] if verse.text: self.create_verse(db_book.id, chapter_number, verse_number, verse.text.strip()) self.wizard.increment_progress_bar( translate('BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...') % {'bookname': db_book.name, 'chapter': chapter_number}) else: # The chapter tags is used as milestones. For now we assume verses is also milestones chapter_number = 0 for element in book: if element.tag == '{http://www.bibletechnologies.net/2003/OSIS/namespace}chapter' \ and element.get('sID'): chapter_number = element.get("osisID").split('.')[1] self.wizard.increment_progress_bar( translate('BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...') % {'bookname': db_book.name, 'chapter': chapter_number}) elif element.tag == '{http://www.bibletechnologies.net/2003/OSIS/namespace}verse' \ and element.get('sID'): # If this tag marks the start of a verse, the verse text is between this tag and # the next tag, which the "tail" attribute gives us. verse_number = element.get("osisID").split('.')[2] verse_text = element.tail if verse_text: self.create_verse(db_book.id, chapter_number, verse_number, verse_text.strip()) self.session.commit() self.application.process_events() except (ValueError, IOError): log.exception('Loading bible from OSIS file failed') trace_error_handler(log) success = False except etree.XMLSyntaxError as e: log.exception('Loading bible from OSIS file failed') trace_error_handler(log) success = False critical_error_message_box(message=translate('BiblesPlugin.OsisImport', 'The file is not a valid OSIS-XML file: \n%s' % e.msg)) finally: if import_file: import_file.close() if self.stop_import_flag: return False else: return success