def do_import(self): """ Receive a single file or a list of files to import. """ if not isinstance(self.import_source, list): return self.import_wizard.progress_bar.setMaximum(len(self.import_source)) for import_file in self.import_source: # TODO: check that it is a valid SongBeamer file if self.stop_import_flag: return self.set_defaults() self.current_verse = '' self.current_verse_type = VerseType.tags[VerseType.Verse] read_verses = False file_name = os.path.split(import_file)[1] if os.path.isfile(import_file): # Detect the encoding self.input_file_encoding = get_file_encoding(import_file)['encoding'] # The encoding should only be ANSI (cp1252), UTF-8, Unicode, Big-Endian-Unicode. # So if it doesn't start with 'u' we default to cp1252. See: # https://forum.songbeamer.com/viewtopic.php?p=419&sid=ca4814924e37c11e4438b7272a98b6f2 if self.input_file_encoding.lower().startswith('u'): self.input_file_encoding = 'cp1252' infile = open(import_file, 'rt', encoding=self.input_file_encoding) song_data = infile.readlines() else: continue self.title = file_name.split('.sng')[0] read_verses = False for line in song_data: # Just make sure that the line is of the type 'Unicode'. line = str(line).strip() if line.startswith('#') and not read_verses: self.parseTags(line) elif line.startswith('--'): # --- and -- allowed for page-breaks (difference in Songbeamer only in printout) if self.current_verse: self.replace_html_tags() self.add_verse(self.current_verse, self.current_verse_type) self.current_verse = '' self.current_verse_type = VerseType.tags[VerseType.Verse] read_verses = True verse_start = True elif read_verses: if verse_start: verse_start = False if not self.check_verse_marks(line): self.current_verse = line + '\n' else: self.current_verse += line + '\n' if self.current_verse: self.replace_html_tags() self.add_verse(self.current_verse, self.current_verse_type) if not self.finish(): self.log_error(import_file)
def do_import(self): self.import_wizard.progress_bar.setMaximum(len(self.import_source)) for file_path in self.import_source: if self.stop_import_flag: return self.import_wizard.increment_progress_bar( WizardStrings.ImportingType.format(source=file_path.name)) try: tree = etree.parse(str(file_path), parser=etree.XMLParser(recover=True)) except etree.XMLSyntaxError: # Try to detect encoding and use it encoding = get_file_encoding(file_path) # Open file with detected encoding and remove encoding declaration text = file_path.read_text(encoding=encoding) text = re.sub(r'.+\?>\n', '', text) try: tree = etree.fromstring( text, parser=etree.XMLParser(recover=True)) except ValueError: log.exception('XML syntax error in file {name}'.format( name=file_path)) self.log_error( file_path, translate( 'SongsPlugin.PresentationManagerImport', 'File is not in XML-format, which is the only format supported.' )) continue file_str = etree.tostring(tree) if not file_str: log.exception( 'Could not find XML in file {name}'.format(name=file_path)) self.log_error( file_path, translate( 'SongsPlugin.PresentationManagerImport', 'File is not in XML-format, which is the only format supported.' )) continue root = objectify.fromstring(file_str) try: self.process_song(root, file_path) except AttributeError: log.exception( 'XML syntax error in file {name}'.format(name=file_path)) self.log_error( file_path, translate( 'SongsPlugin.PresentationManagerImport', 'File is not a valid PresentationManager XMl file.'))
def parse_csv_file(filename, results_tuple): """ Parse the supplied CSV file. :param filename: The name of the file to parse. Str :param results_tuple: The namedtuple to use to store the results. namedtuple :return: An iterable yielding namedtuples of type results_tuple """ try: encoding = get_file_encoding(filename)['encoding'] with open(filename, 'r', encoding=encoding, newline='') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') return [results_tuple(*line) for line in csv_reader] except (OSError, csv.Error): raise ValidationError(msg='Parsing "{file}" failed'.format(file=filename))
def test_get_file_name_encoding_oserror_test(self): """ Test get_file_encoding when the end of the file is reached """ # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test # data (enough to run the iterator twice) with patch('openlp.core.common.UniversalDetector'), \ patch('builtins.open', side_effect=OSError), \ patch('openlp.core.common.log') as mocked_log: # WHEN: Calling get_file_encoding result = get_file_encoding('file name') # THEN: log.exception should be called and get_file_encoding should return None mocked_log.exception.assert_called_once_with('Error detecting file encoding') self.assertIsNone(result)
def parse_csv_file(filename, results_tuple): """ Parse the supplied CSV file. :param filename: The name of the file to parse. Str :param results_tuple: The namedtuple to use to store the results. namedtuple :return: An iterable yielding namedtuples of type results_tuple """ try: encoding = get_file_encoding(filename)['encoding'] with open(filename, 'r', encoding=encoding, newline='') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') return [results_tuple(*line) for line in csv_reader] except (OSError, csv.Error): raise ValidationError(msg='Parsing "{file}" failed'.format( file=filename))
def parse_csv_file(file_path, results_tuple): """ Parse the supplied CSV file. :param openlp.core.common.path.Path file_path: The name of the file to parse. :param namedtuple results_tuple: The namedtuple to use to store the results. :return: An list of namedtuples of type results_tuple :rtype: list[namedtuple] """ try: encoding = get_file_encoding(file_path)['encoding'] with file_path.open('r', encoding=encoding, newline='') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') return [results_tuple(*line) for line in csv_reader] except (OSError, csv.Error): raise ValidationError(msg='Parsing "{file}" failed'.format( file=file_path))
def test_get_file_encoding_done(self): """ Test get_file_encoding when the detector sets done to True """ # GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ patch.object(Path, 'open', return_value=BytesIO(b'data' * 260)) as mocked_open: encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} mocked_universal_detector_inst = MagicMock(**{'close.return_value': encoding_result}) type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True]) mocked_universal_detector.return_value = mocked_universal_detector_inst # WHEN: Calling get_file_encoding result = get_file_encoding(Path('file name')) # THEN: The feed method of UniversalDetector should only br called once before returning a result mocked_open.assert_called_once_with('rb') assert mocked_universal_detector_inst.feed.mock_calls == [call(b'data' * 256)] mocked_universal_detector_inst.close.assert_called_once_with() assert result == 'UTF-8'
def test_get_file_name_encoding_done_test(self): """ Test get_file_encoding when the detector sets done to True """ # GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open: encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} mocked_universal_detector_inst = MagicMock(result=encoding_result) type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True]) mocked_universal_detector.return_value = mocked_universal_detector_inst # WHEN: Calling get_file_encoding result = get_file_encoding('file name') # THEN: The feed method of UniversalDetector should only br called once before returning a result mocked_open.assert_called_once_with('file name', 'rb') self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)]) mocked_universal_detector_inst.close.assert_called_once_with() self.assertEqual(result, encoding_result)
def test_get_file_encoding_eof(self): """ Test get_file_encoding when the end of the file is reached """ # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test # data (enough to run the iterator twice) with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ patch.object(Path, 'open', return_value=BytesIO(b'data' * 260)) as mocked_open: encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector, **{'done': False, 'close.return_value': encoding_result}) mocked_universal_detector.return_value = mocked_universal_detector_inst # WHEN: Calling get_file_encoding result = get_file_encoding(Path('file name')) # THEN: The feed method of UniversalDetector should have been called twice before returning a result mocked_open.assert_called_once_with('rb') assert mocked_universal_detector_inst.feed.mock_calls == [call(b'data' * 256), call(b'data' * 4)] mocked_universal_detector_inst.close.assert_called_once_with() assert result == 'UTF-8'
def test_get_file_name_encoding_eof_test(self): """ Test get_file_encoding when the end of the file is reached """ # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test # data (enough to run the iterator twice) with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open: encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector, **{'done': False, 'result': encoding_result}) mocked_universal_detector.return_value = mocked_universal_detector_inst # WHEN: Calling get_file_encoding result = get_file_encoding('file name') # THEN: The feed method of UniversalDetector should have been called twice before returning a result mocked_open.assert_called_once_with('file name', 'rb') self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)]) mocked_universal_detector_inst.close.assert_called_once_with() self.assertEqual(result, encoding_result)
def test_get_file_encoding_oserror(self): """ Test get_file_encoding when the end of the file is reached """ # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test # data (enough to run the iterator twice) with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ patch('builtins.open', side_effect=OSError), \ patch('openlp.core.common.log') as mocked_log: encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector, **{'done': False, 'close.return_value': encoding_result}) mocked_universal_detector.return_value = mocked_universal_detector_inst # WHEN: Calling get_file_encoding result = get_file_encoding(Path('file name')) # THEN: log.exception should be called and get_file_encoding should return None mocked_log.exception.assert_called_once_with('Error detecting file encoding') mocked_universal_detector_inst.feed.assert_not_called() mocked_universal_detector_inst.close.assert_called_once_with() assert result == 'UTF-8'
def do_import(self): """ Receive a single file or a list of files to import. """ if not isinstance(self.import_source, list): return self.import_wizard.progress_bar.setMaximum(len(self.import_source)) for file_path in self.import_source: # TODO: check that it is a valid SongBeamer file if self.stop_import_flag: return self.set_defaults() self.current_verse = '' self.current_verse_type = VerseType.tags[VerseType.Verse] self.chord_table = None if file_path.is_file(): # Detect the encoding self.input_file_encoding = get_file_encoding( file_path)['encoding'] # The encoding should only be ANSI (cp1252), UTF-8, Unicode, Big-Endian-Unicode. # So if it doesn't start with 'u' we default to cp1252. See: # https://forum.songbeamer.com/viewtopic.php?p=419&sid=ca4814924e37c11e4438b7272a98b6f2 if not self.input_file_encoding.lower().startswith('u'): self.input_file_encoding = 'cp1252' with file_path.open( encoding=self.input_file_encoding) as song_file: song_data = song_file.readlines() else: continue self.title = file_path.stem read_verses = False # The first verse separator doesn't count, but the others does, so line count starts at -1 line_number = -1 verse_tags_mode = VerseTagMode.Unknown first_verse = True idx = -1 while idx + 1 < len(song_data): idx = idx + 1 line = song_data[idx].rstrip() stripped_line = line.strip() if line.startswith('#') and not read_verses: self.parse_tags(line) elif stripped_line.startswith('---'): # '---' is a verse breaker if self.current_verse: self.replace_html_tags() self.add_verse(self.current_verse, self.current_verse_type) self.current_verse = '' self.current_verse_type = VerseType.tags[ VerseType.Verse] first_verse = False read_verses = True verse_start = True # Songbeamer allows chord on line "-1", meaning the first line has only chords if line_number == -1: first_line = self.insert_chords(line_number, '') if first_line: self.current_verse = first_line.strip() + '\n' line_number += 1 elif stripped_line.startswith('--'): # '--' is a page breaker, we convert to optional page break self.current_verse += '[---]\n' line_number += 1 elif read_verses: if verse_start: verse_start = False verse_mark = self.check_verse_marks(line) # To ensure that linenumbers are mapped correctly when inserting chords, we attempt to detect # if verse tags are inserted manually or by SongBeamer. If they are inserted manually the lines # should be counted, otherwise not. If all verses start with a tag we assume it is inserted by # SongBeamer. if first_verse and verse_tags_mode == VerseTagMode.Unknown: if verse_mark: verse_tags_mode = VerseTagMode.ContainsTags else: verse_tags_mode = VerseTagMode.ContainsNoTags elif verse_tags_mode != VerseTagMode.ContainsNoTagsRestart: if not verse_mark and verse_tags_mode == VerseTagMode.ContainsTags: # A verse mark was expected but not found, which means that verse marks has not been # inserted by songbeamer, but are manually added headings. So restart the loop, and # count tags as lines. self.set_defaults() self.title = file_path.stem verse_tags_mode = VerseTagMode.ContainsNoTagsRestart read_verses = False # The first verseseparator doesn't count, but the others does, so linecount starts at -1 line_number = -1 first_verse = True idx = -1 continue if not verse_mark: line = self.insert_chords(line_number, line) self.current_verse += line.strip() + '\n' line_number += 1 elif verse_tags_mode in [ VerseTagMode.ContainsNoTags, VerseTagMode.ContainsNoTagsRestart ]: line_number += 1 else: line = self.insert_chords(line_number, line) self.current_verse += line.strip() + '\n' line_number += 1 if self.current_verse: self.replace_html_tags() self.add_verse(self.current_verse, self.current_verse_type) if not self.finish(): self.log_error(file_path)
def do_import(self): """ Receive a CSV file to import. """ # Get encoding encoding = get_file_encoding(self.import_source)['encoding'] with self.import_source.open('r', encoding=encoding) as songs_file: songs_reader = csv.DictReader(songs_file, escapechar='\\') try: records = list(songs_reader) except csv.Error as e: self.log_error( translate('SongsPlugin.WorshipAssistantImport', 'Error reading CSV file.'), translate('SongsPlugin.WorshipAssistantImport', 'Line {number:d}: {error}').format( number=songs_reader.line_num, error=e)) return num_records = len(records) log.info('{count} records found in CSV file'.format(count=num_records)) self.import_wizard.progress_bar.setMaximum(num_records) # Create regex to strip html tags re_html_strip = re.compile(r'<[^>]+>') for index, record in enumerate(records, 1): if self.stop_import_flag: return # Ensure that all keys are uppercase record = dict( (field.upper(), value) for field, value in record.items()) # The CSV file has a line in the middle of the file where the headers are repeated. # We need to skip this line. if record['TITLE'] == "TITLE" and record[ 'AUTHOR'] == 'AUTHOR' and record['LYRICS2'] == 'LYRICS2': continue self.set_defaults() verse_order_list = [] try: self.title = record['TITLE'] if record['AUTHOR'] != EMPTY_STR: self.parse_author(record['AUTHOR']) if record['COPYRIGHT'] != EMPTY_STR: self.add_copyright(record['COPYRIGHT']) if record['CCLINR'] != EMPTY_STR: self.ccli_number = record['CCLINR'] if record['ROADMAP'] != EMPTY_STR: verse_order_list = [ x.strip() for x in record['ROADMAP'].split(',') ] lyrics = record['LYRICS2'] except UnicodeDecodeError as e: self.log_error( translate('SongsPlugin.WorshipAssistantImport', 'Record {count:d}').format(count=index), translate('SongsPlugin.WorshipAssistantImport', 'Decoding error: {error}').format(error=e)) continue except TypeError as e: self.log_error( translate('SongsPlugin.WorshipAssistantImport', 'File not valid WorshipAssistant CSV format.'), 'TypeError: {error}'.format(error=e)) return verse = '' used_verses = [] verse_id = VerseType.tags[VerseType.Verse] + '1' for line in lyrics.splitlines(): if line.startswith('['): # verse marker # Add previous verse if verse: # remove trailing linebreak, part of the WA syntax self.add_verse(verse[:-1], verse_id) used_verses.append(verse_id) verse = '' # drop the square brackets right_bracket = line.find(']') content = line[1:right_bracket].lower() match = re.match(r'(\D*)(\d+)', content) if match is not None: verse_tag = match.group(1) verse_num = match.group(2) else: # otherwise we assume number 1 and take the whole prefix as the verse tag verse_tag = content verse_num = '1' verse_index = VerseType.from_loose_input( verse_tag) if verse_tag else 0 verse_tag = VerseType.tags[verse_index] # Update verse order when the verse name has changed verse_id = verse_tag + verse_num # Make sure we've not choosen an id already used while verse_id in verse_order_list and content in verse_order_list: verse_num = str(int(verse_num) + 1) verse_id = verse_tag + verse_num if content != verse_id: for i in range(len(verse_order_list)): if verse_order_list[i].lower() == content.lower(): verse_order_list[i] = verse_id else: # add line text to verse. Strip out html verse += re_html_strip.sub('', line) + '\n' if verse: # remove trailing linebreak, part of the WA syntax if verse.endswith('\n\n'): verse = verse[:-1] self.add_verse(verse, verse_id) used_verses.append(verse_id) if verse_order_list: # Use the verse order in the import, but remove entries that doesn't have a text cleaned_verse_order_list = [] for verse in verse_order_list: if verse in used_verses: cleaned_verse_order_list.append(verse) self.verse_order_list = cleaned_verse_order_list if not self.finish(): self.log_error( translate('SongsPlugin.WorshipAssistantImport', 'Record {count:d}').format(count=index) + (': "' + self.title + '"' if self.title else ''))