Python get_file_encoding 예제들, openlp.core.common.get_file_encoding Python 예제들

예제 #1

0

파일 보기

파일: songbeamer.py 프로젝트: imkernel/openlp

 def do_import(self):
     """
     Receive a single file or a list of files to import.
     """
     if not isinstance(self.import_source, list):
         return
     self.import_wizard.progress_bar.setMaximum(len(self.import_source))
     for import_file in self.import_source:
         # TODO: check that it is a valid SongBeamer file
         if self.stop_import_flag:
             return
         self.set_defaults()
         self.current_verse = ''
         self.current_verse_type = VerseType.tags[VerseType.Verse]
         read_verses = False
         file_name = os.path.split(import_file)[1]
         if os.path.isfile(import_file):
             # Detect the encoding
             self.input_file_encoding = get_file_encoding(import_file)['encoding']
             # The encoding should only be ANSI (cp1252), UTF-8, Unicode, Big-Endian-Unicode.
             # So if it doesn't start with 'u' we default to cp1252. See:
             # https://forum.songbeamer.com/viewtopic.php?p=419&sid=ca4814924e37c11e4438b7272a98b6f2
             if self.input_file_encoding.lower().startswith('u'):
                 self.input_file_encoding = 'cp1252'
             infile = open(import_file, 'rt', encoding=self.input_file_encoding)
             song_data = infile.readlines()
         else:
             continue
         self.title = file_name.split('.sng')[0]
         read_verses = False
         for line in song_data:
             # Just make sure that the line is of the type 'Unicode'.
             line = str(line).strip()
             if line.startswith('#') and not read_verses:
                 self.parseTags(line)
             elif line.startswith('--'):
                 # --- and -- allowed for page-breaks (difference in Songbeamer only in printout)
                 if self.current_verse:
                     self.replace_html_tags()
                     self.add_verse(self.current_verse, self.current_verse_type)
                     self.current_verse = ''
                     self.current_verse_type = VerseType.tags[VerseType.Verse]
                 read_verses = True
                 verse_start = True
             elif read_verses:
                 if verse_start:
                     verse_start = False
                     if not self.check_verse_marks(line):
                         self.current_verse = line + '\n'
                 else:
                     self.current_verse += line + '\n'
         if self.current_verse:
             self.replace_html_tags()
             self.add_verse(self.current_verse, self.current_verse_type)
         if not self.finish():
             self.log_error(import_file)

예제 #2

0

파일 보기

파일: presentationmanager.py 프로젝트: ipic/projecao

 def do_import(self):
     self.import_wizard.progress_bar.setMaximum(len(self.import_source))
     for file_path in self.import_source:
         if self.stop_import_flag:
             return
         self.import_wizard.increment_progress_bar(
             WizardStrings.ImportingType.format(source=file_path.name))
         try:
             tree = etree.parse(str(file_path),
                                parser=etree.XMLParser(recover=True))
         except etree.XMLSyntaxError:
             # Try to detect encoding and use it
             encoding = get_file_encoding(file_path)
             # Open file with detected encoding and remove encoding declaration
             text = file_path.read_text(encoding=encoding)
             text = re.sub(r'.+\?>\n', '', text)
             try:
                 tree = etree.fromstring(
                     text, parser=etree.XMLParser(recover=True))
             except ValueError:
                 log.exception('XML syntax error in file {name}'.format(
                     name=file_path))
                 self.log_error(
                     file_path,
                     translate(
                         'SongsPlugin.PresentationManagerImport',
                         'File is not in XML-format, which is the only format supported.'
                     ))
                 continue
         file_str = etree.tostring(tree)
         if not file_str:
             log.exception(
                 'Could not find XML in file {name}'.format(name=file_path))
             self.log_error(
                 file_path,
                 translate(
                     'SongsPlugin.PresentationManagerImport',
                     'File is not in XML-format, which is the only format supported.'
                 ))
             continue
         root = objectify.fromstring(file_str)
         try:
             self.process_song(root, file_path)
         except AttributeError:
             log.exception(
                 'XML syntax error in file {name}'.format(name=file_path))
             self.log_error(
                 file_path,
                 translate(
                     'SongsPlugin.PresentationManagerImport',
                     'File is not a valid PresentationManager XMl file.'))

예제 #3

0

파일 보기

파일: csvbible.py 프로젝트: imkernel/openlp

    def parse_csv_file(filename, results_tuple):
        """
        Parse the supplied CSV file.

        :param filename: The name of the file to parse. Str
        :param results_tuple: The namedtuple to use to store the results. namedtuple
        :return: An iterable yielding namedtuples of type results_tuple
        """
        try:
            encoding = get_file_encoding(filename)['encoding']
            with open(filename, 'r', encoding=encoding, newline='') as csv_file:
                csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
                return [results_tuple(*line) for line in csv_reader]
        except (OSError, csv.Error):
            raise ValidationError(msg='Parsing "{file}" failed'.format(file=filename))

예제 #4

0

파일 보기

파일: test_init.py 프로젝트: imkernel/openlp

    def test_get_file_name_encoding_oserror_test(self):
        """
        Test get_file_encoding when the end of the file is reached
        """
        # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
        #       data (enough to run the iterator twice)
        with patch('openlp.core.common.UniversalDetector'), \
                patch('builtins.open', side_effect=OSError), \
                patch('openlp.core.common.log') as mocked_log:

            # WHEN: Calling get_file_encoding
            result = get_file_encoding('file name')

            # THEN: log.exception should be called and get_file_encoding should return None
            mocked_log.exception.assert_called_once_with('Error detecting file encoding')
            self.assertIsNone(result)

예제 #5

0

파일 보기

파일: test_init.py 프로젝트: feitianyiren/openlp

    def test_get_file_name_encoding_oserror_test(self):
        """
        Test get_file_encoding when the end of the file is reached
        """
        # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
        #       data (enough to run the iterator twice)
        with patch('openlp.core.common.UniversalDetector'), \
                patch('builtins.open', side_effect=OSError), \
                patch('openlp.core.common.log') as mocked_log:

            # WHEN: Calling get_file_encoding
            result = get_file_encoding('file name')

            # THEN: log.exception should be called and get_file_encoding should return None
            mocked_log.exception.assert_called_once_with('Error detecting file encoding')
            self.assertIsNone(result)

예제 #6

0

파일 보기

    def parse_csv_file(filename, results_tuple):
        """
        Parse the supplied CSV file.

        :param filename: The name of the file to parse. Str
        :param results_tuple: The namedtuple to use to store the results. namedtuple
        :return: An iterable yielding namedtuples of type results_tuple
        """
        try:
            encoding = get_file_encoding(filename)['encoding']
            with open(filename, 'r', encoding=encoding,
                      newline='') as csv_file:
                csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
                return [results_tuple(*line) for line in csv_reader]
        except (OSError, csv.Error):
            raise ValidationError(msg='Parsing "{file}" failed'.format(
                file=filename))

예제 #7

0

파일 보기

파일: csvbible.py 프로젝트: simhnna/openlp

    def parse_csv_file(file_path, results_tuple):
        """
        Parse the supplied CSV file.

        :param openlp.core.common.path.Path file_path: The name of the file to parse.
        :param namedtuple results_tuple: The namedtuple to use to store the results.
        :return: An list of namedtuples of type results_tuple
        :rtype: list[namedtuple]
        """
        try:
            encoding = get_file_encoding(file_path)['encoding']
            with file_path.open('r', encoding=encoding,
                                newline='') as csv_file:
                csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
                return [results_tuple(*line) for line in csv_reader]
        except (OSError, csv.Error):
            raise ValidationError(msg='Parsing "{file}" failed'.format(
                file=file_path))

예제 #8

0

파일 보기

    def test_get_file_encoding_done(self):
        """
        Test get_file_encoding when the detector sets done to True
        """
        # GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration
        with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
                patch.object(Path, 'open', return_value=BytesIO(b'data' * 260)) as mocked_open:
            encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
            mocked_universal_detector_inst = MagicMock(**{'close.return_value': encoding_result})
            type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True])
            mocked_universal_detector.return_value = mocked_universal_detector_inst

            # WHEN: Calling get_file_encoding
            result = get_file_encoding(Path('file name'))

            # THEN: The feed method of UniversalDetector should only br called once before returning a result
            mocked_open.assert_called_once_with('rb')
            assert mocked_universal_detector_inst.feed.mock_calls == [call(b'data' * 256)]
            mocked_universal_detector_inst.close.assert_called_once_with()
            assert result == 'UTF-8'

예제 #9

0

파일 보기

파일: test_init.py 프로젝트: imkernel/openlp

    def test_get_file_name_encoding_done_test(self):
        """
        Test get_file_encoding when the detector sets done to True
        """
        # GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration
        with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
                patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
            encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
            mocked_universal_detector_inst = MagicMock(result=encoding_result)
            type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True])
            mocked_universal_detector.return_value = mocked_universal_detector_inst

            # WHEN: Calling get_file_encoding
            result = get_file_encoding('file name')

            # THEN: The feed method of UniversalDetector should only br called once before returning a result
            mocked_open.assert_called_once_with('file name', 'rb')
            self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)])
            mocked_universal_detector_inst.close.assert_called_once_with()
            self.assertEqual(result, encoding_result)

예제 #10

0

파일 보기

    def test_get_file_encoding_eof(self):
        """
        Test get_file_encoding when the end of the file is reached
        """
        # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
        #       data (enough to run the iterator twice)
        with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
                patch.object(Path, 'open', return_value=BytesIO(b'data' * 260)) as mocked_open:
            encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
            mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
                                                       **{'done': False, 'close.return_value': encoding_result})
            mocked_universal_detector.return_value = mocked_universal_detector_inst

            # WHEN: Calling get_file_encoding
            result = get_file_encoding(Path('file name'))

            # THEN: The feed method of UniversalDetector should have been called twice before returning a result
            mocked_open.assert_called_once_with('rb')
            assert mocked_universal_detector_inst.feed.mock_calls == [call(b'data' * 256), call(b'data' * 4)]
            mocked_universal_detector_inst.close.assert_called_once_with()
            assert result == 'UTF-8'

예제 #11

0

파일 보기

파일: test_init.py 프로젝트: imkernel/openlp

    def test_get_file_name_encoding_eof_test(self):
        """
        Test get_file_encoding when the end of the file is reached
        """
        # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
        #       data (enough to run the iterator twice)
        with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
                patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
            encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
            mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
                                                       **{'done': False, 'result': encoding_result})
            mocked_universal_detector.return_value = mocked_universal_detector_inst

            # WHEN: Calling get_file_encoding
            result = get_file_encoding('file name')

            # THEN: The feed method of UniversalDetector should have been called twice before returning a result
            mocked_open.assert_called_once_with('file name', 'rb')
            self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)])
            mocked_universal_detector_inst.close.assert_called_once_with()
            self.assertEqual(result, encoding_result)

예제 #12

0

파일 보기

    def test_get_file_encoding_oserror(self):
        """
        Test get_file_encoding when the end of the file is reached
        """
        # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
        #       data (enough to run the iterator twice)
        with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
                patch('builtins.open', side_effect=OSError), \
                patch('openlp.core.common.log') as mocked_log:
            encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
            mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
                                                       **{'done': False, 'close.return_value': encoding_result})
            mocked_universal_detector.return_value = mocked_universal_detector_inst

            # WHEN: Calling get_file_encoding
            result = get_file_encoding(Path('file name'))

            # THEN: log.exception should be called and get_file_encoding should return None
            mocked_log.exception.assert_called_once_with('Error detecting file encoding')
            mocked_universal_detector_inst.feed.assert_not_called()
            mocked_universal_detector_inst.close.assert_called_once_with()
            assert result == 'UTF-8'

예제 #13

0

파일 보기

파일: songbeamer.py 프로젝트: simhnna/openlp

 def do_import(self):
     """
     Receive a single file or a list of files to import.
     """
     if not isinstance(self.import_source, list):
         return
     self.import_wizard.progress_bar.setMaximum(len(self.import_source))
     for file_path in self.import_source:
         # TODO: check that it is a valid SongBeamer file
         if self.stop_import_flag:
             return
         self.set_defaults()
         self.current_verse = ''
         self.current_verse_type = VerseType.tags[VerseType.Verse]
         self.chord_table = None
         if file_path.is_file():
             # Detect the encoding
             self.input_file_encoding = get_file_encoding(
                 file_path)['encoding']
             # The encoding should only be ANSI (cp1252), UTF-8, Unicode, Big-Endian-Unicode.
             # So if it doesn't start with 'u' we default to cp1252. See:
             # https://forum.songbeamer.com/viewtopic.php?p=419&sid=ca4814924e37c11e4438b7272a98b6f2
             if not self.input_file_encoding.lower().startswith('u'):
                 self.input_file_encoding = 'cp1252'
             with file_path.open(
                     encoding=self.input_file_encoding) as song_file:
                 song_data = song_file.readlines()
         else:
             continue
         self.title = file_path.stem
         read_verses = False
         # The first verse separator doesn't count, but the others does, so line count starts at -1
         line_number = -1
         verse_tags_mode = VerseTagMode.Unknown
         first_verse = True
         idx = -1
         while idx + 1 < len(song_data):
             idx = idx + 1
             line = song_data[idx].rstrip()
             stripped_line = line.strip()
             if line.startswith('#') and not read_verses:
                 self.parse_tags(line)
             elif stripped_line.startswith('---'):
                 # '---' is a verse breaker
                 if self.current_verse:
                     self.replace_html_tags()
                     self.add_verse(self.current_verse,
                                    self.current_verse_type)
                     self.current_verse = ''
                     self.current_verse_type = VerseType.tags[
                         VerseType.Verse]
                     first_verse = False
                 read_verses = True
                 verse_start = True
                 # Songbeamer allows chord on line "-1", meaning the first line has only chords
                 if line_number == -1:
                     first_line = self.insert_chords(line_number, '')
                     if first_line:
                         self.current_verse = first_line.strip() + '\n'
                 line_number += 1
             elif stripped_line.startswith('--'):
                 # '--' is a page breaker, we convert to optional page break
                 self.current_verse += '[---]\n'
                 line_number += 1
             elif read_verses:
                 if verse_start:
                     verse_start = False
                     verse_mark = self.check_verse_marks(line)
                     # To ensure that linenumbers are mapped correctly when inserting chords, we attempt to detect
                     # if verse tags are inserted manually or by SongBeamer. If they are inserted manually the lines
                     # should be counted, otherwise not. If all verses start with a tag we assume it is inserted by
                     # SongBeamer.
                     if first_verse and verse_tags_mode == VerseTagMode.Unknown:
                         if verse_mark:
                             verse_tags_mode = VerseTagMode.ContainsTags
                         else:
                             verse_tags_mode = VerseTagMode.ContainsNoTags
                     elif verse_tags_mode != VerseTagMode.ContainsNoTagsRestart:
                         if not verse_mark and verse_tags_mode == VerseTagMode.ContainsTags:
                             # A verse mark was expected but not found, which means that verse marks has not been
                             # inserted by songbeamer, but are manually added headings. So restart the loop, and
                             # count tags as lines.
                             self.set_defaults()
                             self.title = file_path.stem
                             verse_tags_mode = VerseTagMode.ContainsNoTagsRestart
                             read_verses = False
                             # The first verseseparator doesn't count, but the others does, so linecount starts at -1
                             line_number = -1
                             first_verse = True
                             idx = -1
                             continue
                     if not verse_mark:
                         line = self.insert_chords(line_number, line)
                         self.current_verse += line.strip() + '\n'
                         line_number += 1
                     elif verse_tags_mode in [
                             VerseTagMode.ContainsNoTags,
                             VerseTagMode.ContainsNoTagsRestart
                     ]:
                         line_number += 1
                 else:
                     line = self.insert_chords(line_number, line)
                     self.current_verse += line.strip() + '\n'
                     line_number += 1
         if self.current_verse:
             self.replace_html_tags()
             self.add_verse(self.current_verse, self.current_verse_type)
         if not self.finish():
             self.log_error(file_path)

예제 #14

0

파일 보기

파일: worshipassistant.py 프로젝트: simhnna/openlp

 def do_import(self):
     """
     Receive a CSV file to import.
     """
     # Get encoding
     encoding = get_file_encoding(self.import_source)['encoding']
     with self.import_source.open('r', encoding=encoding) as songs_file:
         songs_reader = csv.DictReader(songs_file, escapechar='\\')
         try:
             records = list(songs_reader)
         except csv.Error as e:
             self.log_error(
                 translate('SongsPlugin.WorshipAssistantImport',
                           'Error reading CSV file.'),
                 translate('SongsPlugin.WorshipAssistantImport',
                           'Line {number:d}: {error}').format(
                               number=songs_reader.line_num, error=e))
             return
     num_records = len(records)
     log.info('{count} records found in CSV file'.format(count=num_records))
     self.import_wizard.progress_bar.setMaximum(num_records)
     # Create regex to strip html tags
     re_html_strip = re.compile(r'<[^>]+>')
     for index, record in enumerate(records, 1):
         if self.stop_import_flag:
             return
         # Ensure that all keys are uppercase
         record = dict(
             (field.upper(), value) for field, value in record.items())
         # The CSV file has a line in the middle of the file where the headers are repeated.
         #  We need to skip this line.
         if record['TITLE'] == "TITLE" and record[
                 'AUTHOR'] == 'AUTHOR' and record['LYRICS2'] == 'LYRICS2':
             continue
         self.set_defaults()
         verse_order_list = []
         try:
             self.title = record['TITLE']
             if record['AUTHOR'] != EMPTY_STR:
                 self.parse_author(record['AUTHOR'])
             if record['COPYRIGHT'] != EMPTY_STR:
                 self.add_copyright(record['COPYRIGHT'])
             if record['CCLINR'] != EMPTY_STR:
                 self.ccli_number = record['CCLINR']
             if record['ROADMAP'] != EMPTY_STR:
                 verse_order_list = [
                     x.strip() for x in record['ROADMAP'].split(',')
                 ]
             lyrics = record['LYRICS2']
         except UnicodeDecodeError as e:
             self.log_error(
                 translate('SongsPlugin.WorshipAssistantImport',
                           'Record {count:d}').format(count=index),
                 translate('SongsPlugin.WorshipAssistantImport',
                           'Decoding error: {error}').format(error=e))
             continue
         except TypeError as e:
             self.log_error(
                 translate('SongsPlugin.WorshipAssistantImport',
                           'File not valid WorshipAssistant CSV format.'),
                 'TypeError: {error}'.format(error=e))
             return
         verse = ''
         used_verses = []
         verse_id = VerseType.tags[VerseType.Verse] + '1'
         for line in lyrics.splitlines():
             if line.startswith('['):  # verse marker
                 # Add previous verse
                 if verse:
                     # remove trailing linebreak, part of the WA syntax
                     self.add_verse(verse[:-1], verse_id)
                     used_verses.append(verse_id)
                     verse = ''
                 # drop the square brackets
                 right_bracket = line.find(']')
                 content = line[1:right_bracket].lower()
                 match = re.match(r'(\D*)(\d+)', content)
                 if match is not None:
                     verse_tag = match.group(1)
                     verse_num = match.group(2)
                 else:
                     # otherwise we assume number 1 and take the whole prefix as the verse tag
                     verse_tag = content
                     verse_num = '1'
                 verse_index = VerseType.from_loose_input(
                     verse_tag) if verse_tag else 0
                 verse_tag = VerseType.tags[verse_index]
                 # Update verse order when the verse name has changed
                 verse_id = verse_tag + verse_num
                 # Make sure we've not choosen an id already used
                 while verse_id in verse_order_list and content in verse_order_list:
                     verse_num = str(int(verse_num) + 1)
                     verse_id = verse_tag + verse_num
                 if content != verse_id:
                     for i in range(len(verse_order_list)):
                         if verse_order_list[i].lower() == content.lower():
                             verse_order_list[i] = verse_id
             else:
                 # add line text to verse. Strip out html
                 verse += re_html_strip.sub('', line) + '\n'
         if verse:
             # remove trailing linebreak, part of the WA syntax
             if verse.endswith('\n\n'):
                 verse = verse[:-1]
             self.add_verse(verse, verse_id)
             used_verses.append(verse_id)
         if verse_order_list:
             # Use the verse order in the import, but remove entries that doesn't have a text
             cleaned_verse_order_list = []
             for verse in verse_order_list:
                 if verse in used_verses:
                     cleaned_verse_order_list.append(verse)
             self.verse_order_list = cleaned_verse_order_list
         if not self.finish():
             self.log_error(
                 translate('SongsPlugin.WorshipAssistantImport',
                           'Record {count:d}').format(count=index) +
                 (': "' + self.title + '"' if self.title else ''))