def test_normalize_str_replaces_crlf_with_lf(self): # GIVEN: a string containing crlf string = 'something\r\nelse' # WHEN: normalize is called normalized_string = normalize_str(string) # THEN: crlf is replaced with lf assert normalized_string == 'something\nelse'
def test_normalize_str_removes_null_byte(self): # GIVEN: a string containing a null byte string = 'somet\x00hing' # WHEN: normalize is called normalized_string = normalize_str(string) # THEN: nullbyte is removed assert normalized_string == 'something'
def test_normalize_str_leaves_newlines(self): # GIVEN: a string containing newlines string = 'something\nelse' # WHEN: normalize is called normalized_string = normalize_str(string) # THEN: string is unchanged assert normalized_string == string
def process_songs_text(self, text): """ Process the songs text :param text: The text. """ song_texts = normalize_str(text).split('\f') self.set_defaults() for song_text in song_texts: if song_text.strip(): self.process_song_text(song_text.strip()) if self.check_complete(): self.finish() self.set_defaults() if self.check_complete(): self.finish()
def do_import_file(self, file): """ Process the OpenSong file - pass in a file-like object, not a file path. """ self.set_defaults() try: tree = objectify.parse(file) except (Error, LxmlError): self.log_error(file.name, SongStrings.XMLSyntaxError) log.exception('Error parsing XML') return root = tree.getroot() if root.tag != 'song': self.log_error(file.name, str( translate('SongsPlugin.OpenSongImport', 'Invalid OpenSong song file. Missing song tag.'))) return fields = dir(root) decode = { 'copyright': self.add_copyright, 'ccli': 'ccli_number', 'author': self.parse_author, 'title': 'title', 'aka': 'alternate_title', 'hymn_number': self.parse_song_book_name_and_number, 'user1': self.add_comment, 'user2': self.add_comment, 'user3': self.add_comment } for attr, fn_or_string in list(decode.items()): if attr in fields: ustring = str(root.__getattr__(attr)) if isinstance(fn_or_string, str): if attr in ['ccli']: ustring = ''.join(re.findall(r'\d+', ustring)) if ustring: setattr(self, fn_or_string, int(ustring)) else: setattr(self, fn_or_string, None) else: setattr(self, fn_or_string, ustring) else: fn_or_string(ustring) # Themes look like "God: Awe/Wonder", but we just want # "Awe" and "Wonder". We use a set to ensure each topic # is only added once, in case it is already there, which # is actually quite likely if the alttheme is set topics = set(self.topics) if 'theme' in fields: theme = str(root.theme) subthemes = theme[theme.find(':') + 1:].split('/') for topic in subthemes: topics.add(topic.strip()) if 'alttheme' in fields: theme = str(root.alttheme) subthemes = theme[theme.find(':') + 1:].split('/') for topic in subthemes: topics.add(topic.strip()) self.topics = list(topics) self.topics.sort() # data storage while importing verses = {} # keep track of verses appearance order our_verse_order = [] # default verse verse_tag = VerseType.tags[VerseType.Verse] verse_num = '1' # for the case where song has several sections with same marker inst = 1 if 'lyrics' in fields: lyrics = str(root.lyrics) else: lyrics = '' chords = [] for this_line in lyrics.split('\n'): if not this_line.strip(): continue # skip this line if it is a comment if this_line.startswith(';'): continue # skip page and column breaks if this_line.startswith('---') or this_line.startswith('-!!'): continue # guitar chords marker if this_line.startswith('.'): # Find the position of the chords so they can be inserted in the lyrics chords = [] this_line = this_line[1:] chord = '' i = 0 while i < len(this_line): if this_line[i] != ' ': chord_pos = i chord += this_line[i] i += 1 while i < len(this_line) and this_line[i] != ' ': chord += this_line[i] i += 1 chords.append((chord_pos, chord)) chord = '' i += 1 continue # verse/chorus/etc. marker if this_line.startswith('['): # drop the square brackets right_bracket = this_line.find(']') content = this_line[1:right_bracket].lower() # have we got any digits? If so, verse number is everything from the digits to the end (openlp does not # have concept of part verses, so just ignore any non integers on the end (including floats)) match = re.match(r'(\D*)(\d+)', content) if match is not None: verse_tag = match.group(1) verse_num = match.group(2) else: # otherwise we assume number 1 and take the whole prefix as the verse tag verse_tag = content verse_num = '1' verse_index = VerseType.from_loose_input(verse_tag) if verse_tag else 0 verse_tag = VerseType.tags[verse_index] inst = 1 if [verse_tag, verse_num, inst] in our_verse_order and verse_num in verses.get(verse_tag, {}): inst = len(verses[verse_tag][verse_num]) + 1 continue # number at start of line.. it's verse number if this_line[0].isdigit(): verse_num = this_line[0] this_line = this_line[1:] verses.setdefault(verse_tag, {}) verses[verse_tag].setdefault(verse_num, {}) if inst not in verses[verse_tag][verse_num]: verses[verse_tag][verse_num][inst] = [] our_verse_order.append([verse_tag, verse_num, inst]) # If chords exists insert them if chords and Settings().value('songs/enable chords') and not Settings().value( 'songs/disable chords import'): offset = 0 for (column, chord) in chords: this_line = '{pre}[{chord}]{post}'.format(pre=this_line[:offset + column], chord=chord, post=this_line[offset + column:]) offset += len(chord) + 2 # Tidy text and remove the ____s from extended words this_line = normalize_str(this_line) this_line = this_line.replace('_', '') this_line = this_line.replace('||', '\n[---]\n') this_line = this_line.strip() # If the line consists solely of a '|', then just use the implicit newline # Otherwise, add a newline for each '|' if this_line == '|': this_line = '' else: this_line = this_line.replace('|', '\n') verses[verse_tag][verse_num][inst].append(this_line) # done parsing # add verses in original order verse_joints = {} for (verse_tag, verse_num, inst) in our_verse_order: lines = '\n'.join(verses[verse_tag][verse_num][inst]) length = 0 while length < len(verse_num) and verse_num[length].isnumeric(): length += 1 verse_def = '{tag}{number}'.format(tag=verse_tag, number=verse_num[:length]) verse_joints[verse_def] = '{verse}\n[---]\n{lines}'.format(verse=verse_joints[verse_def], lines=lines) \ if verse_def in verse_joints else lines # Parsing the dictionary produces the elements in a non-intuitive order. While it "works", it's not a # natural layout should the user come back to edit the song. Instead we sort by the verse type, so that we # get all the verses in order (v1, v2, ...), then the chorus(es), bridge(s), pre-chorus(es) etc. We use a # tuple for the key, since tuples naturally sort in this manner. verse_defs = sorted(verse_joints.keys(), key=lambda verse_def: (VerseType.from_tag(verse_def[0]), int(verse_def[1:]))) for verse_def in verse_defs: lines = verse_joints[verse_def] self.add_verse(lines, verse_def) if not self.verses: self.add_verse('') # figure out the presentation order, if present if 'presentation' in fields and root.presentation: order = str(root.presentation) # We make all the tags in the lyrics lower case, so match that here and then split into a list on the # whitespace. order = order.lower().split() for verse_def in order: match = re.match(r'(\D*)(\d+.*)', verse_def) if match is not None: verse_tag = match.group(1) verse_num = match.group(2) if not verse_tag: verse_tag = VerseType.tags[VerseType.Verse] else: # Assume it's no.1 if there are no digits verse_tag = verse_def verse_num = '1' verse_index = VerseType.from_loose_input(verse_tag) verse_tag = VerseType.tags[verse_index] verse_def = '{tag}{number}'.format(tag=verse_tag, number=verse_num) if verse_num in verses.get(verse_tag, {}): self.verse_order_list.append(verse_def) else: log.info('Got order {order} but not in verse tags, dropping this item from presentation ' 'order'.format(order=verse_def)) if not self.finish(): self.log_error(file.name)
def _parse_and_add_lyrics(self, song): """ Process the song lyrics :param song: The song details """ try: lyrics = str(song.Contents).strip() except UnicodeDecodeError: log.exception('Unicode decode error while decoding Contents') self._success = False return except AttributeError: log.exception('no Contents') self._success = False return lines = lyrics.split('\n') # we go over all lines first, to determine information, # which tells us how to parse verses later region_lines = {} separator_lines = 0 for line in lines: line = line.strip() if not line: continue elif line[1:7] == 'region': # this is region separator, probably [region 2] region = self._extract_region(line) region_lines[region] = 1 + region_lines.get(region, 0) elif line[0] == '[': separator_lines += 1 # if the song has separators separators = (separator_lines > 0) # the number of different regions in song - 1 if len(region_lines) > 1: log.info( 'EasySlidesImport: the file contained a song named "{title}"' 'with more than two regions, but only two regions are tested, ' 'encountered regions were: {keys}'.format( title=self.title, keys=','.join(list(region_lines.keys())))) # if the song has regions regions = (len(region_lines) > 0) # if the regions are inside verses regions_in_verses = (regions and region_lines[list(region_lines.keys())[0]] > 1) MarkTypes = { 'CHORUS': VerseType.tags[VerseType.Chorus], 'VERSE': VerseType.tags[VerseType.Verse], 'INTRO': VerseType.tags[VerseType.Intro], 'ENDING': VerseType.tags[VerseType.Ending], 'BRIDGE': VerseType.tags[VerseType.Bridge], 'PRECHORUS': VerseType.tags[VerseType.PreChorus] } verses = {} # list as [region, versetype, versenum, instance] our_verse_order = [] default_region = '1' reg = default_region verses[reg] = {} # instance differentiates occurrences of same verse tag vt = 'v' vn = '1' inst = 1 for line in lines: line = line.strip() if not line: if separators: # separators are used, so empty line means slide break # inside verse if self._list_has(verses, [reg, vt, vn, inst]): inst += 1 else: # separators are not used, so empty line starts a new verse vt = 'v' vn = len(verses[reg].get(vt, {})) + 1 inst = 1 elif line[0:7] == '[region': reg = self._extract_region(line) verses.setdefault(reg, {}) if not regions_in_verses: vt = 'v' vn = '1' inst = 1 elif line[0] == '[': # this is a normal section marker marker = line[1:line.find(']')].upper() vn = '1' # have we got any digits? # If so, versenumber is everything from the digits to the end match = re.match(r'(.*)(\d+.*)', marker) if match: marker = match.group(1).strip() vn = match.group(2) vt = MarkTypes.get(marker, 'o') if marker else 'v' if regions_in_verses: region = default_region inst = 1 if self._list_has(verses, [reg, vt, vn, inst]): inst = len(verses[reg][vt][vn]) + 1 else: if not [reg, vt, vn, inst] in our_verse_order: our_verse_order.append([reg, vt, vn, inst]) verses[reg].setdefault(vt, {}) verses[reg][vt].setdefault(vn, {}) verses[reg][vt][vn].setdefault(inst, []) verses[reg][vt][vn][inst].append(normalize_str(line)) # done parsing versetags = [] # we use our_verse_order to ensure, we insert lyrics in the same order # as these appeared originally in the file for [reg, vt, vn, inst] in our_verse_order: if self._list_has(verses, [reg, vt, vn, inst]): # this is false, but needs user input versetag = '{tag}{number}'.format(tag=vt, number=vn) versetags.append(versetag) lines = '\n'.join(verses[reg][vt][vn][inst]) self.add_verse(lines, versetag) SeqTypes = { 'p': 'p1', 'q': 'p2', 'c': 'c1', 't': 'c2', 'b': 'b1', 'w': 'b2', 'e': 'e1' } # Make use of Sequence data, determining the order of verses try: order = str(song.Sequence).strip().split(',') for tag in order: if not tag: continue elif tag[0].isdigit(): tag = 'v' + tag elif tag.lower() in SeqTypes: tag = SeqTypes[tag.lower()] else: continue if tag in versetags: self.verse_order_list.append(tag) else: log.info( 'Got order item {tag}, which is not in versetags, dropping item from presentation ' 'order'.format(tag=tag)) except UnicodeDecodeError: log.exception('Unicode decode error while decoding Sequence') self._success = False except AttributeError: pass
def finish(self): """ All fields have been set to this song. Write the song to disk. """ if not self.check_complete(): self.set_defaults() return False log.info( 'committing song {title} to database'.format(title=self.title)) song = Song() song.title = self.title if self.import_wizard is not None: self.import_wizard.increment_progress_bar( WizardStrings.ImportingType.format(source=song.title)) song.alternate_title = self.alternate_title # Values will be set when cleaning the song. song.search_title = '' song.search_lyrics = '' song.verse_order = '' song.song_number = self.song_number verses_changed_to_other = {} sxml = SongXML() other_count = 1 for (verse_def, verse_text, lang) in self.verses: if verse_def[0].lower() in VerseType.tags: verse_tag = verse_def[0].lower() else: new_verse_def = '{tag}{count:d}'.format( tag=VerseType.tags[VerseType.Other], count=other_count) verses_changed_to_other[verse_def] = new_verse_def other_count += 1 verse_tag = VerseType.tags[VerseType.Other] log.info('Versetype {old} changing to {new}'.format( old=verse_def, new=new_verse_def)) verse_def = new_verse_def sxml.add_verse_to_lyrics(verse_tag, verse_def[1:], normalize_str(verse_text), lang) song.lyrics = str(sxml.extract_xml(), 'utf-8') if not self.verse_order_list and self.verse_order_list_generated_useful: self.verse_order_list = self.verse_order_list_generated self.verse_order_list = [ verses_changed_to_other.get(v, v) for v in self.verse_order_list ] song.verse_order = ' '.join(self.verse_order_list) song.copyright = self.copyright song.comments = self.comments song.theme_name = self.theme_name song.ccli_number = self.ccli_number for author_text, author_type in self.authors: author = self.manager.get_object_filtered( Author, Author.display_name == author_text) if not author: author = Author.populate(display_name=author_text, last_name=author_text.split(' ')[-1], first_name=' '.join( author_text.split(' ')[:-1])) song.add_author(author, author_type) if self.song_book_name: song_book = self.manager.get_object_filtered( Book, Book.name == self.song_book_name) if song_book is None: song_book = Book.populate(name=self.song_book_name, publisher=self.song_book_pub) song.add_songbook_entry(song_book, song.song_number) for topic_text in self.topics: if not topic_text: continue topic = self.manager.get_object_filtered(Topic, Topic.name == topic_text) if topic is None: topic = Topic.populate(name=topic_text) song.topics.append(topic) # We need to save the song now, before adding the media files, so that # we know where to save the media files to. clean_song(self.manager, song) self.manager.save_object(song) # Now loop through the media files, copy them to the correct location, # and save the song again. for file_path, weight in self.media_files: media_file = self.manager.get_object_filtered( MediaFile, MediaFile.file_path == file_path) if not media_file: if file_path.parent: file_path = self.copy_media_file(song.id, file_path) song.media_files.append( MediaFile.populate(file_path=file_path, weight=weight)) self.manager.save_object(song) self.set_defaults() return True