def strip_rtf_charsets_test(self): """ Test that the strip_rtf() method properly decodes the supported charsets. """ test_charset_table = [ ('0', 'weor\\\'F0-myndum \\\'FEah\\par ', 'weorð-myndum þah\n'), ('128', '\\\'83C\\\'83G\\\'83X\\\'A5\\\'83L\\\'83\\\'8A\\\'83X\\\'83g\\\'A1 ' '\\\\ \\\'95\\\\ \\\'8E\\} \\\'8E\\{ \\\'A1\\par ', 'イエス・キリスト。 ¥ 表 枝 施 。\n'), ('129', '\\\'BF\\\'B9\\\'BC\\\'F6 \\\'B1\\\'D7\\\'B8\\\'AE\\\'BD\\\'BA\\\'B5\\\'B5\\par ', '예수 그리스도\n'), ('134', '\\\'D2\\\'AE\\\'F6\\\'D5\\\'BB\\\'F9\\\'B6\\\'BD\\\'CA\\\'C7\\\'D6\\\'F7\\par ', '耶稣基督是主\n'), ('161', '\\\'D7\\\'F1\\\'E9\\\'F3\\\'F4\\\'FC\\\'F2\\par ', 'Χριστός\n'), ('162', 'Hazreti \\\'DDsa\\par ', 'Hazreti İsa\n'), ('163', 'ph\\\'FD\\\'F5ng\\par ', 'phương\n'), ('177', '\\\'E1\\\'F8\\\'E0\\\'F9\\\'E9\\\'FA\\par ', 'בראשית\n'), ('178', '\\\'ED\\\'D3\\\'E6\\\'DA \\\'C7\\\'E1\\\'E3\\\'D3\\\'ED\\\'CD\\par ', 'يسوع المسيح\n'), ('186', 'J\\\'EBzus Kristus yra Vie\\\'F0pats\\par ', 'Jėzus Kristus yra Viešpats\n'), ('204', '\\\'D0\\\'EE\\\'F1\\\'F1\\\'E8\\\'FF\\par ', 'Россия\n'), ('222', '\\\'A4\\\'C3\\\'D4\\\'CA\\\'B5\\\'EC\\par ', 'คริสต์\n'), ('238', 'Z\\\'E1v\\\'ECre\\\'E8n\\\'E1 zkou\\\'9Aka\\par ', 'Závěrečná zkouška\n') ] # GIVEN: For each character set and input for charset, input, exp_result in test_charset_table: # WHEN: We call strip_rtf on the input RTF result, result_enc = strip_rtf( '{\\rtf1 \\ansi \\ansicpg1252 {\\fonttbl \\f0 \\fswiss \\fcharset%s Helvetica;}' \ '{\\colortbl ;\\red0 \\green0 \\blue0 ;}\\pard \\f0 %s}' % (charset, input)) # THEN: The stripped text matches thed expected result assert result == exp_result, 'The result should be %s' % exp_result
def strip_rtf_charsets_test(self): """ Test that the strip_rtf() method properly decodes the supported charsets. """ test_charset_table = [ ('0', 'weor\\\'F0-myndum \\\'FEah\\par ', 'weorð-myndum þah\n'), ('128', '\\\'83C\\\'83G\\\'83X\\\'A5\\\'83L\\\'83\\\'8A\\\'83X\\\'83g\\\'A1 ' '\\\\ \\\'95\\\\ \\\'8E\\} \\\'8E\\{ \\\'A1\\par ', 'イエス・キリスト。 ¥ 表 枝 施 。\n'), ('129', '\\\'BF\\\'B9\\\'BC\\\'F6 \\\'B1\\\'D7\\\'B8\\\'AE\\\'BD\\\'BA\\\'B5\\\'B5\\par ', '예수 그리스도\n'), ('134', '\\\'D2\\\'AE\\\'F6\\\'D5\\\'BB\\\'F9\\\'B6\\\'BD\\\'CA\\\'C7\\\'D6\\\'F7\\par ', '耶稣基督是主\n'), ('161', '\\\'D7\\\'F1\\\'E9\\\'F3\\\'F4\\\'FC\\\'F2\\par ', 'Χριστός\n'), ('162', 'Hazreti \\\'DDsa\\par ', 'Hazreti İsa\n'), ('163', 'ph\\\'FD\\\'F5ng\\par ', 'phương\n'), ('177', '\\\'E1\\\'F8\\\'E0\\\'F9\\\'E9\\\'FA\\par ', 'בראשית\n'), ('178', '\\\'ED\\\'D3\\\'E6\\\'DA \\\'C7\\\'E1\\\'E3\\\'D3\\\'ED\\\'CD\\par ', 'يسوع المسيح\n'), ('186', 'J\\\'EBzus Kristus yra Vie\\\'F0pats\\par ', 'Jėzus Kristus yra Viešpats\n'), ('204', '\\\'D0\\\'EE\\\'F1\\\'F1\\\'E8\\\'FF\\par ', 'Россия\n'), ('222', '\\\'A4\\\'C3\\\'D4\\\'CA\\\'B5\\\'EC\\par ', 'คริสต์\n'), ('238', 'Z\\\'E1v\\\'ECre\\\'E8n\\\'E1 zkou\\\'9Aka\\par ', 'Závěrečná zkouška\n') ] # GIVEN: For each character set and input for charset, input, exp_result in test_charset_table: # WHEN: We call strip_rtf on the input RTF result, result_enc = strip_rtf( '{\\rtf1 \\ansi \\ansicpg1252 {\\fonttbl \\f0 \\fswiss \\fcharset%s Helvetica;}' '{\\colortbl ;\\red0 \\green0 \\blue0 ;}\\pard \\f0 %s}' % (charset, input)) # THEN: The stripped text matches thed expected result assert result == exp_result, 'The result should be %s' % exp_result
def processSection(self, tag, text): """ Process a section of the song, i.e. title, verse etc. """ if tag == u'T': self.setDefaults() if text: self.title = text return elif tag == u'E': self.finish() return if u'rtf1' in text: result = strip_rtf(text, self.encoding) if result is None: return text, self.encoding = result text = text.rstrip() if not text: return if tag == u'A': self.parseAuthor(text) elif tag in [u'B', u'C']: self.addVerse(text, tag) elif tag == u'D': self.addVerse(text, u'E') elif tag == u'G': self.topics.append(text) elif tag == u'M': matches = re.findall(r'\d+', text) if matches: self.songNumber = matches[-1] self.songBookName = text[:text.rfind(self.songNumber)] elif tag == u'N': self.comments = text elif tag == u'O': for char in text: if char == u'C': self.verseOrderList.append(u'C1') elif char == u'B': self.verseOrderList.append(u'B1') elif char == u'D': self.verseOrderList.append(u'E1') elif u'1' <= char <= u'7': self.verseOrderList.append(u'V' + char) elif tag == u'R': self.addCopyright(text) elif u'1' <= tag <= u'7': self.addVerse(text, u'V' + tag[1:])
def process_section(self, tag, text): """ Process a section of the song, i.e. title, verse etc. """ if tag == 'T': self.set_defaults() if text: self.title = text return elif tag == 'E': self.finish() return if 'rtf1' in text: result = strip_rtf(text, self.encoding) if result is None: return text, self.encoding = result text = text.rstrip() if not text: return if tag == 'A': self.parse_author(text) elif tag in ['B', 'C']: self.add_verse(text, tag) elif tag == 'D': self.add_verse(text, 'E') elif tag == 'G': self.topics.append(text) elif tag == 'M': matches = re.findall(r'\d+', text) if matches: self.song_number = matches[-1] self.song_book_name = text[:text.rfind(self.song_number)] elif tag == 'N': self.comments = text elif tag == 'O': for char in text: if char == 'C': self.verse_order_list.append('C1') elif char == 'B': self.verse_order_list.append('B1') elif char == 'D': self.verse_order_list.append('E1') elif '1' <= char <= '7': self.verse_order_list.append('V' + char) elif tag == 'R': self.add_copyright(text) elif '1' <= tag <= '7': self.add_verse(text, 'V' + tag[1:])
def process_song(self, root, filename): self.set_defaults() self.title = os.path.basename(filename).rstrip('.pro4') self.copyright = root.get('CCLICopyrightInfo') self.comments = root.get('notes') self.ccli_number = root.get('CCLILicenseNumber') for author_key in ['author', 'artist', 'CCLIArtistCredits']: author = root.get(author_key) if len(author) > 0: self.parse_author(author) count = 0 for slide in root.slides.RVDisplaySlide: count += 1 if not hasattr(slide.displayElements, 'RVTextElement'): log.debug('No text found, may be an image slide') continue RTFData = slide.displayElements.RVTextElement.get('RTFData') rtf = base64.standard_b64decode(RTFData) words, encoding = strip_rtf(rtf.decode()) self.add_verse(words, "v%d" % count) if not self.finish(): self.log_error(self.import_source)
def process_song(self, root, filename): self.set_defaults() # Extract ProPresenter versionNumber try: self.version = int(root.get('versionNumber')) except ValueError: log.debug('ProPresenter versionNumber invalid or missing') return # Title self.title = root.get('CCLISongTitle') if not self.title or self.title == '': self.title = os.path.basename(filename) if self.title[-5:-1] == '.pro': self.title = self.title[:-5] # Notes self.comments = root.get('notes') # Author for author_key in ['author', 'CCLIAuthor', 'artist', 'CCLIArtistCredits']: author = root.get(author_key) if author and len(author) > 0: self.parse_author(author) # ProPresenter 4 if(self.version >= 400 and self.version < 500): self.copyright = root.get('CCLICopyrightInfo') self.ccli_number = root.get('CCLILicenseNumber') count = 0 for slide in root.slides.RVDisplaySlide: count += 1 if not hasattr(slide.displayElements, 'RVTextElement'): log.debug('No text found, may be an image slide') continue RTFData = slide.displayElements.RVTextElement.get('RTFData') rtf = base64.standard_b64decode(RTFData) words, encoding = strip_rtf(rtf.decode()) self.add_verse(words, "v{count}".format(count=count)) # ProPresenter 5 elif(self.version >= 500 and self.version < 600): self.copyright = root.get('CCLICopyrightInfo') self.ccli_number = root.get('CCLILicenseNumber') count = 0 for group in root.groups.RVSlideGrouping: for slide in group.slides.RVDisplaySlide: count += 1 if not hasattr(slide.displayElements, 'RVTextElement'): log.debug('No text found, may be an image slide') continue RTFData = slide.displayElements.RVTextElement.get('RTFData') rtf = base64.standard_b64decode(RTFData) words, encoding = strip_rtf(rtf.decode()) self.add_verse(words, "v{count:d}".format(count=count)) # ProPresenter 6 elif(self.version >= 600 and self.version < 700): self.copyright = root.get('CCLICopyrightYear') self.ccli_number = root.get('CCLISongNumber') count = 0 for group in root.array.RVSlideGrouping: for slide in group.array.RVDisplaySlide: count += 1 for item in slide.array: if not (item.get('rvXMLIvarName') == "displayElements"): continue if not hasattr(item, 'RVTextElement'): log.debug('No text found, may be an image slide') continue for contents in item.RVTextElement.NSString: b64Data = contents.text data = base64.standard_b64decode(b64Data) words = None if(contents.get('rvXMLIvarName') == "RTFData"): words, encoding = strip_rtf(data.decode()) break if words: self.add_verse(words, "v{count:d}".format(count=count)) if not self.finish(): self.log_error(self.import_source)
def set_song_import_object(self, authors, words): """ Set the SongImport object members. :param authors: String with authons :param words: Bytes with rtf-encoding """ if authors: # Split up the authors author_list = authors.split('/') if len(author_list) < 2: author_list = authors.split(';') if len(author_list) < 2: author_list = authors.split(',') for author_name in author_list: self.add_author(author_name.strip()) if words: # Format the lyrics result = None decoded_words = None try: decoded_words = words.decode() except UnicodeDecodeError: # The unicode chars in the rtf was not escaped in the expected manner self.entry_error_log = translate('SongsPlugin.EasyWorshipSongImport', 'Unexpected data formatting.') return result = strip_rtf(decoded_words, self.encoding) if result is None: self.entry_error_log = translate('SongsPlugin.EasyWorshipSongImport', 'No song text found.') return words, self.encoding = result verse_type = VerseType.tags[VerseType.Verse] for verse in SLIDE_BREAK_REGEX.split(words): verse = verse.strip() if not verse: continue verse_split = verse.split('\n', 1) first_line_is_tag = False # EW tags: verse, chorus, pre-chorus, bridge, tag, # intro, ending, slide for tag in VerseType.names + ['tag', 'slide', 'end']: tag = tag.lower() ew_tag = verse_split[0].strip().lower() if ew_tag.startswith(tag): verse_type = tag[0] if tag == 'tag' or tag == 'slide': verse_type = VerseType.tags[VerseType.Other] first_line_is_tag = True number_found = False # check if tag is followed by number and/or note if len(ew_tag) > len(tag): match = NUMBER_REGEX.search(ew_tag) if match: number = match.group() verse_type += number number_found = True match = NOTE_REGEX.search(ew_tag) if match: self.comments += ew_tag + '\n' if not number_found: verse_type += '1' break # If the verse only consist of the tag-line, add an empty line to create an empty slide if first_line_is_tag and len(verse_split) == 1: verse_split.append("") self.add_verse(verse_split[-1].strip() if first_line_is_tag else verse, verse_type) if len(self.comments) > 5: self.comments += str(translate('SongsPlugin.EasyWorshipSongImport', '\n[above are Song Tags with notes imported from EasyWorship]'))
def set_song_import_object(self, authors, words): """ Set the SongImport object members. :param authors: String with authons :param words: Bytes with rtf-encoding """ if authors: # Split up the authors author_list = authors.split('/') if len(author_list) < 2: author_list = authors.split(';') if len(author_list) < 2: author_list = authors.split(',') for author_name in author_list: self.add_author(author_name.strip()) if words: # Format the lyrics result = None decoded_words = None try: decoded_words = words.decode() except UnicodeDecodeError: # The unicode chars in the rtf was not escaped in the expected manner self.entry_error_log = translate( 'SongsPlugin.EasyWorshipSongImport', 'Unexpected data formatting.') return result = strip_rtf(decoded_words, self.encoding) if result is None: self.entry_error_log = translate( 'SongsPlugin.EasyWorshipSongImport', 'No song text found.') return words, self.encoding = result verse_type = VerseType.tags[VerseType.Verse] for verse in SLIDE_BREAK_REGEX.split(words): verse = verse.strip() if not verse: continue verse_split = verse.split('\n', 1) first_line_is_tag = False # EW tags: verse, chorus, pre-chorus, bridge, tag, # intro, ending, slide for tag in VerseType.tags + ['tag', 'slide']: tag = tag.lower() ew_tag = verse_split[0].strip().lower() if ew_tag.startswith(tag): verse_type = tag[0] if tag == 'tag' or tag == 'slide': verse_type = VerseType.tags[VerseType.Other] first_line_is_tag = True number_found = False # check if tag is followed by number and/or note if len(ew_tag) > len(tag): match = NUMBER_REGEX.search(ew_tag) if match: number = match.group() verse_type += number number_found = True match = NOTE_REGEX.search(ew_tag) if match: self.comments += ew_tag + '\n' if not number_found: verse_type += '1' break self.add_verse( verse_split[-1].strip() if first_line_is_tag else verse, verse_type) if len(self.comments) > 5: self.comments += str( translate( 'SongsPlugin.EasyWorshipSongImport', '\n[above are Song Tags with notes imported from EasyWorship]' ))
def doImport(self): # Open the DB and MB files if they exist import_source_mb = self.import_source.replace('.DB', '.MB') if not os.path.isfile(self.import_source) or not os.path.isfile(import_source_mb): return db_size = os.path.getsize(self.import_source) if db_size < 0x800: return db_file = open(self.import_source, 'rb') self.memoFile = open(import_source_mb, 'rb') # Don't accept files that are clearly not paradox files record_size, header_size, block_size, first_block, num_fields = struct.unpack('<hhxb8xh17xh', db_file.read(35)) if header_size != 0x800 or block_size < 1 or block_size > 4: db_file.close() self.memoFile.close() return # Take a stab at how text is encoded self.encoding = 'cp1252' db_file.seek(106) code_page, = struct.unpack('<h', db_file.read(2)) if code_page == 852: self.encoding = 'cp1250' # The following codepage to actual encoding mappings have not been # observed, but merely guessed. Actual example files are needed. elif code_page == 737: self.encoding = 'cp1253' elif code_page == 775: self.encoding = 'cp1257' elif code_page == 855: self.encoding = 'cp1251' elif code_page == 857: self.encoding = 'cp1254' elif code_page == 866: self.encoding = 'cp1251' elif code_page == 869: self.encoding = 'cp1253' elif code_page == 862: self.encoding = 'cp1255' elif code_page == 874: self.encoding = 'cp874' self.encoding = retrieve_windows_encoding(self.encoding) if not self.encoding: return # Read the field description information db_file.seek(120) field_info = db_file.read(num_fields * 2) db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR) field_names = db_file.read(header_size - db_file.tell()).split('\0', num_fields) field_names.pop() field_descs = [] for i, field_name in enumerate(field_names): field_type, field_size = struct.unpack_from('BB', field_info, i * 2) field_descs.append(FieldDescEntry(field_name, field_type, field_size)) self.setRecordStruct(field_descs) # Pick out the field description indexes we will need try: success = True fi_title = self.findField('Title') fi_author = self.findField('Author') fi_copy = self.findField('Copyright') fi_admin = self.findField('Administrator') fi_words = self.findField('Words') fi_ccli = self.findField('Song Number') except IndexError: # This is the wrong table success = False # There does not appear to be a _reliable_ way of getting the number of songs/records, so loop through the file # blocks and total the number of records. Store the information in a list so we dont have to do all this again. cur_block = first_block total_count = 0 block_list = [] while cur_block != 0 and success: cur_block_pos = header_size + ((cur_block - 1) * 1024 * block_size) db_file.seek(cur_block_pos) cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6)) rec_count = (rec_count + record_size) / record_size block_list.append((cur_block_pos, rec_count)) total_count += rec_count self.import_wizard.progress_bar.setMaximum(total_count) for block in block_list: cur_block_pos, rec_count = block db_file.seek(cur_block_pos + 6) # Loop through each record within the current block for i in range(rec_count): if self.stop_import_flag: break raw_record = db_file.read(record_size) self.fields = self.recordStruct.unpack(raw_record) self.setDefaults() self.title = self.getField(fi_title) # Get remaining fields. copy = self.getField(fi_copy) admin = self.getField(fi_admin) ccli = self.getField(fi_ccli) authors = self.getField(fi_author) words = self.getField(fi_words) # Set the SongImport object members. if copy: self.copyright = copy if admin: if copy: self.copyright += ', ' self.copyright += translate('SongsPlugin.EasyWorshipSongImport', 'Administered by %s') % admin if ccli: self.ccliNumber = ccli if authors: # Split up the authors author_list = authors.split('/') if len(author_list) < 2: author_list = authors.split(';') if len(author_list) < 2: author_list = authors.split(',') for author_name in author_list: self.addAuthor(author_name.strip()) if words: # Format the lyrics result = strip_rtf(words, self.encoding) if result is None: return words, self.encoding = result verse_type = VerseType.tags[VerseType.Verse] for verse in SLIDE_BREAK_REGEX.split(words): verse = verse.strip() if not verse: continue verse_split = verse.split('\n', 1) first_line_is_tag = False # EW tags: verse, chorus, pre-chorus, bridge, tag, # intro, ending, slide for tag in VerseType.tags + ['tag', 'slide']: tag = tag.lower() ew_tag = verse_split[0].strip().lower() if ew_tag.startswith(tag): verse_type = tag[0] if tag == 'tag' or tag == 'slide': verse_type = VerseType.tags[VerseType.Other] first_line_is_tag = True number_found = False # check if tag is followed by number and/or note if len(ew_tag) > len(tag): match = NUMBER_REGEX.search(ew_tag) if match: number = match.group() verse_type += number number_found = True match = NOTE_REGEX.search(ew_tag) if match: self.comments += ew_tag + '\n' if not number_found: verse_type += '1' break self.addVerse(verse_split[-1].strip() if first_line_is_tag else verse, verse_type) if len(self.comments) > 5: self.comments += str(translate('SongsPlugin.EasyWorshipSongImport', '\n[above are Song Tags with notes imported from EasyWorship]')) if self.stop_import_flag: break if not self.finish(): self.logError(self.import_source) db_file.close() self.memoFile.close()
def parse(self, data, cell=False): """ Process the records :param data: The data to be processed :param cell: ? :return: """ if not cell and (len(data) == 0 or data[0:1] != b'[' or data.strip()[-1:] != b']'): self.log_error('File is malformed') return False i = 1 verse_type = VerseType.tags[VerseType.Verse] while i < len(data): # Data is held as #name: value pairs inside groups marked as []. # Now we are looking for the name. if data[i:i + 1] == b'#': name_end = data.find(b':', i + 1) name = data[i + 1:name_end].decode(self.encoding).upper() i = name_end + 1 while data[i:i + 1] == b' ': i += 1 if data[i:i + 1] == b'"': end = data.find(b'"', i + 1) value = data[i + 1:end] elif data[i:i + 1] == b'[': j = i inside_quotes = False while j < len(data): char = data[j:j + 1] if char == b'"': inside_quotes = not inside_quotes elif not inside_quotes and char == b']': end = j + 1 break j += 1 value = data[i:end] else: end = data.find(b',', i + 1) if data.find(b'(', i, end) != -1: end = data.find(b')', i) + 1 value = data[i:end] # If we are in the main group. if not cell: if name == 'TITLE': self.title = self.decode(self.unescape(value)) elif name == 'AUTHOR': author = self.decode(self.unescape(value)) if len(author): self.add_author(author) elif name == 'COPYRIGHT': self.add_copyright(self.decode(self.unescape(value))) elif name[0:4] == 'CELL': self.parse(value, cell=name[4:]) # We are in a verse group. else: if name == 'MARKER_NAME': value = self.decode(value).strip() if len(value): verse_type = VerseType.tags[ VerseType.from_loose_input(value[0])] if len(value) >= 2 and value[-1] in [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]: verse_type = "{verse}{value}".format( verse=verse_type, value=value[-1]) elif name == 'HOTKEY': value = self.decode(value).strip() # HOTKEY always appears after MARKER_NAME, so it # effectively overrides MARKER_NAME, if present. if len(value) and value in list( HOTKEY_TO_VERSE_TYPE.keys()): verse_type = HOTKEY_TO_VERSE_TYPE[value] if name == 'RTF': value = self.unescape(value) value = self.decode(value) result = strip_rtf(value, self.encoding) if result is None: return False verse, self.encoding = result lines = verse.strip().split('\n') # If any line inside any verse contains CCLI or # only Public Domain, we treat this as special data: # we remove that line and add data to specific field. processed_lines = [] for i in range(len(lines)): line = lines[i].strip() if line[:3].lower() == 'ccl': m = re.search(r'[0-9]+', line) if m: self.ccli_number = int(m.group(0)) continue elif line.lower() == 'public domain': self.add_copyright('Public Domain') continue processed_lines.append(line) self.add_verse('\n'.join(processed_lines).strip(), verse_type) if end == -1: break i = end + 1 i += 1 return True
def process_song(self, root, file_path): """ :param root: :param pathlib.Path file_path: Path to the file thats being imported :rtype: None """ self.set_defaults() # Extract ProPresenter versionNumber try: self.version = int(root.get('versionNumber')) except (ValueError, TypeError): log.debug('ProPresenter versionNumber invalid or missing') return # Title self.title = root.get('CCLISongTitle') if not self.title or self.title == '': self.title = file_path.stem # Notes self.comments = root.get('notes') # Author for author_key in [ 'author', 'CCLIAuthor', 'artist', 'CCLIArtistCredits' ]: author = root.get(author_key) if author and len(author) > 0: self.parse_author(author) # ProPresenter 4 if 400 <= self.version < 500: self.copyright = root.get('CCLICopyrightInfo') self.ccli_number = root.get('CCLILicenseNumber') count = 0 for slide in root.slides.RVDisplaySlide: count += 1 if not hasattr(slide.displayElements, 'RVTextElement'): log.debug('No text found, may be an image slide') continue RTFData = slide.displayElements.RVTextElement.get('RTFData') rtf = base64.standard_b64decode(RTFData) words, encoding = strip_rtf(rtf.decode()) self.add_verse(words, "v{count}".format(count=count)) # ProPresenter 5 elif 500 <= self.version < 600: self.copyright = root.get('CCLICopyrightInfo') self.ccli_number = root.get('CCLILicenseNumber') count = 0 for group in root.groups.RVSlideGrouping: for slide in group.slides.RVDisplaySlide: count += 1 if not hasattr(slide.displayElements, 'RVTextElement'): log.debug('No text found, may be an image slide') continue RTFData = slide.displayElements.RVTextElement.get( 'RTFData') rtf = base64.standard_b64decode(RTFData) words, encoding = strip_rtf(rtf.decode()) self.add_verse(words, "v{count:d}".format(count=count)) # ProPresenter 6 elif 600 <= self.version < 700: self.copyright = root.get('CCLICopyrightYear') self.ccli_number = root.get('CCLISongNumber') count = 0 for group in root.array.RVSlideGrouping: for slide in group.array.RVDisplaySlide: count += 1 for item in slide.array: if not (item.get('rvXMLIvarName') == "displayElements"): continue if not hasattr(item, 'RVTextElement'): log.debug('No text found, may be an image slide') continue for contents in item.RVTextElement.NSString: b64Data = contents.text data = base64.standard_b64decode(b64Data) words = None if contents.get('rvXMLIvarName') == "RTFData": words, encoding = strip_rtf(data.decode()) break if words: self.add_verse(words, "v{count:d}".format(count=count)) if not self.finish(): self.log_error(self.import_source)
def parse(self, data, cell=False): """ Process the records :param data: The data to be processed :param cell: ? :return: """ if len(data) == 0 or data[0:1] != '[' or data[-1] != ']': self.log_error('File is malformed') return False i = 1 verse_type = VerseType.tags[VerseType.Verse] while i < len(data): # Data is held as #name: value pairs inside groups marked as []. # Now we are looking for the name. if data[i:i + 1] == '#': name_end = data.find(':', i + 1) name = data[i + 1:name_end].upper() i = name_end + 1 while data[i:i + 1] == ' ': i += 1 if data[i:i + 1] == '"': end = data.find('"', i + 1) value = data[i + 1:end] elif data[i:i + 1] == '[': j = i inside_quotes = False while j < len(data): char = data[j:j + 1] if char == '"': inside_quotes = not inside_quotes elif not inside_quotes and char == ']': end = j + 1 break j += 1 value = data[i:end] else: end = data.find(',', i + 1) if data.find('(', i, end) != -1: end = data.find(')', i) + 1 value = data[i:end] # If we are in the main group. if not cell: if name == 'TITLE': self.title = self.decode(self.unescape(value)) elif name == 'AUTHOR': author = self.decode(self.unescape(value)) if len(author): self.add_author(author) elif name == 'COPYRIGHT': self.copyright = self.decode(self.unescape(value)) elif name[0:4] == 'CELL': self.parse(value, cell=name[4:]) # We are in a verse group. else: if name == 'MARKER_NAME': value = value.strip() if len(value): verse_type = VerseType.tags[VerseType.from_loose_input(value[0])] if len(value) >= 2 and value[-1] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']: verse_type = "%s%s" % (verse_type, value[-1]) elif name == 'HOTKEY': # HOTKEY always appears after MARKER_NAME, so it # effectively overrides MARKER_NAME, if present. if len(value) and value in list(HOTKEY_TO_VERSE_TYPE.keys()): verse_type = HOTKEY_TO_VERSE_TYPE[value] if name == 'RTF': value = self.unescape(value) result = strip_rtf(value, self.encoding) if result is None: return verse, self.encoding = result lines = verse.strip().split('\n') # If any line inside any verse contains CCLI or # only Public Domain, we treat this as special data: # we remove that line and add data to specific field. processed_lines = [] for i in range(len(lines)): line = lines[i].strip() if line[:3].lower() == 'ccl': m = re.search(r'[0-9]+', line) if m: self.ccli_number = int(m.group(0)) continue elif line.lower() == 'public domain': self.copyright = 'Public Domain' continue processed_lines.append(line) self.add_verse('\n'.join(processed_lines).strip(), verse_type) if end == -1: break i = end + 1 i += 1 return True