Beispiel #1
0
 def test_unicode(self):
     self.assertEqual(u'Exception: spam',
                      unicode(ErrorString(Exception(u'spam'))))
     self.assertEqual(u'IndexError: '+self.us,
                      unicode(ErrorString(IndexError(self.us))))
     self.assertEqual(u'ImportError: %s' % SafeString(self.bs),
                      unicode(ErrorString(ImportError(self.bs))))
Beispiel #2
0
 def test_str(self):
     self.assertEqual('Exception: spam',
                      str(ErrorString(Exception('spam'))))
     self.assertEqual('IndexError: '+str(self.bs),
                      str(ErrorString(IndexError(self.bs))))
     self.assertEqual('ImportError: %s' % SafeString(self.us),
                      str(ErrorString(ImportError(self.us))))
Beispiel #3
0
 def process_txt(self, directory, name):
     if name.startswith('pep-'):
         publisher = 'PEPs'
     else:
         publisher = '.txt'
     settings = self.get_settings(publisher, directory)
     errout = ErrorOutput(encoding=settings.error_encoding)
     pub_struct = self.publishers[publisher]
     if settings.prune and (directory in settings.prune):
         return 1
     settings._source = os.path.normpath(os.path.join(directory, name))
     settings._destination = settings._source[:-4] + '.html'
     if not self.initial_settings.silent:
         print >> errout, '    ::: Processing: %s' % name
         sys.stderr.flush()
     try:
         if not settings.dry_run:
             core.publish_file(source_path=settings._source,
                               destination_path=settings._destination,
                               reader_name=pub_struct.reader_name,
                               parser_name='restructuredtext',
                               writer_name=pub_struct.writer_name,
                               settings=settings)
     except ApplicationError, error:
         print >> errout, '        %s' % ErrorString(error)
Beispiel #4
0
 def validate_settings(self, filename, option_parser):
     """
     Call the validator function and implement overrides on all applicable
     settings.
     """
     for section in self.sections():
         for setting in self.options(section):
             try:
                 option = option_parser.get_option_by_dest(setting)
             except KeyError:
                 continue
             if option.validator:
                 value = self.get(section, setting)
                 try:
                     new_value = option.validator(setting,
                                                  value,
                                                  option_parser,
                                                  config_parser=self,
                                                  config_section=section)
                 except Exception as error:
                     raise ValueError(
                         'Error in config file "%s", section "[%s]":\n'
                         '    %s\n'
                         '        %s = %s' %
                         (filename, section, ErrorString(error), setting,
                          value))
                 self.set(section, setting, new_value)
             if option.overrides:
                 self.set(section, option.overrides, None)
Beispiel #5
0
    def write(self, data):
        """Encode `data`, write it to a single file, and return it.

        In Python 3, `data` is returned unchanged.
        """
        if sys.version_info < (3, 0):
            data = self.encode(data)
        if not self.opened:
            self.open()
        try:  # In Python < 2.5, try...except has to be nested in try...finally.
            try:
                if (sys.version_info >= (3, 0) and self.encoding
                        and hasattr(self.destination, 'encoding')
                        and self.encoding != self.destination.encoding
                        and codecs.lookup(self.encoding) != codecs.lookup(
                            self.destination.encoding)):
                    # encode self, write bytes
                    bdata = self.encode(data)
                    if os.linesep != '\n':
                        bdata = bdata.replace('\n', os.linesep)
                    sys.stdout.buffer.write(bdata)
                else:
                    self.destination.write(data)
            except (UnicodeError, LookupError), err:  # can only happen in py3k
                raise UnicodeError(
                    'Unable to encode output data. output-encoding is: '
                    '%s.\n(%s)' % (self.encoding, ErrorString(err)))
        finally:
            if self.autoclose:
                self.close()
        return data
Beispiel #6
0
 def run(self):
     """Include a file as part of the content of this reST file."""
     if not self.state.document.settings.file_insertion_enabled:
         raise self.warning('"%s" directive disabled.' % self.name)
     source = self.state_machine.input_lines.source(
         self.lineno - self.state_machine.input_offset - 1)
     source_dir = os.path.dirname(os.path.abspath(source))
     path = directives.path(self.arguments[0])
     if path.startswith('<') and path.endswith('>'):
         path = os.path.join(self.standard_include_path, path[1:-1])
     path = os.path.normpath(os.path.join(source_dir, path))
     path = utils.relative_path(None, path)
     path = nodes.reprunicode(path)
     encoding = self.options.get(
         'encoding', self.state.document.settings.input_encoding)
     tab_width = self.options.get('tab-width',
                                  self.state.document.settings.tab_width)
     try:
         self.state.document.settings.record_dependencies.add(path)
         include_file = io.FileInput(
             source_path=path, encoding=encoding,
             error_handler=(self.state.document.settings.\
                            input_encoding_error_handler),
             handle_io_errors=None)
     except IOError, error:
         raise self.severe(u'Problems with "%s" directive path:\n%s.' %
                           (self.name, ErrorString(error)))
Beispiel #7
0
    def write(self, data):
        """Encode `data`, write it to a single file, and return it.

        With Python 3 or binary output mode, `data` is returned unchanged,
        except when specified encoding and output encoding differ.
        """
        if not self.opened:
            self.open()
        try:  # In Python < 2.5, try...except has to be nested in try...finally.
            try:
                if 'b' not in self.mode and (
                        sys.version_info < (3, 0) or check_encoding(
                            self.destination, self.encoding) is False):
                    data = self.encode(data)
                    if sys.version_info >= (3, 0) and os.linesep != '\n':
                        # writing as binary data -> fix endings
                        data = data.replace('\n', os.linesep)

                self.destination.write(data)

            except (UnicodeError, LookupError) as err:
                raise UnicodeError(
                    'Unable to encode output data. output-encoding is: '
                    '%s.\n(%s)' % (self.encoding, ErrorString(err)))
        finally:
            if self.autoclose:
                self.close()
        return data
 def report_UnicodeError(self, error):
     data = error.object[error.start:error.end]
     self._stderr.write(
         '%s\n'
         '\n'
         'The specified output encoding (%s) cannot\n'
         'handle all of the output.\n'
         'Try setting "--output-encoding-error-handler" to\n'
         '\n'
         '* "xmlcharrefreplace" (for HTML & XML output);\n'
         '  the output will contain "%s" and should be usable.\n'
         '* "backslashreplace" (for other output formats);\n'
         '  look for "%s" in the output.\n'
         '* "replace"; look for "?" in the output.\n'
         '\n'
         '"--output-encoding-error-handler" is currently set to "%s".\n'
         '\n'
         'Exiting due to error.  Use "--traceback" to diagnose.\n'
         'If the advice above doesn\'t eliminate the error,\n'
         'please report it to <*****@*****.**>.\n'
         'Include "--traceback" output, Docutils version (%s),\n'
         'Python version (%s), your OS type & version, and the\n'
         'command line used.\n' %
         (ErrorString(error), self.settings.output_encoding,
          data.encode('ascii', 'xmlcharrefreplace'),
          data.encode('ascii', 'backslashreplace'),
          self.settings.output_encoding_error_handler, __version__,
          sys.version.split()[0]))
Beispiel #9
0
 def open(self):
     try:
         self.destination = open(self.destination_path, 'wb')
     except IOError, error:
         if not self.handle_io_errors:
             raise
         print >> self._stderr, ErrorString(error)
         print >> self._stderr, (u'Unable to open destination file'
                                 u" for writing ('%s').  Exiting." %
                                 self.destination_path)
         sys.exit(1)
Beispiel #10
0
 def run(self):
     if (not self.state.document.settings.raw_enabled
             or (not self.state.document.settings.file_insertion_enabled and
                 ('file' in self.options or 'url' in self.options))):
         raise self.warning('"%s" directive disabled.' % self.name)
     attributes = {'format': ' '.join(self.arguments[0].lower().split())}
     encoding = self.options.get(
         'encoding', self.state.document.settings.input_encoding)
     if self.content:
         if 'file' in self.options or 'url' in self.options:
             raise self.error(
                 '"%s" directive may not both specify an external file '
                 'and have content.' % self.name)
         text = '\n'.join(self.content)
     elif 'file' in self.options:
         if 'url' in self.options:
             raise self.error(
                 'The "file" and "url" options may not be simultaneously '
                 'specified for the "%s" directive.' % self.name)
         source_dir = os.path.dirname(
             os.path.abspath(self.state.document.current_source))
         path = os.path.normpath(
             os.path.join(source_dir, self.options['file']))
         path = utils.relative_path(None, path)
         try:
             raw_file = io.FileInput(
                 source_path=path, encoding=encoding,
                 error_handler=(self.state.document.settings.\
                                input_encoding_error_handler),
                 handle_io_errors=None)
             # TODO: currently, raw input files are recorded as
             # dependencies even if not used for the chosen output format.
             self.state.document.settings.record_dependencies.add(path)
         except IOError, error:
             raise self.severe(u'Problems with "%s" directive path:\n%s.' %
                               (self.name, ErrorString(error)))
         try:
             text = raw_file.read()
         except UnicodeError, error:
             raise self.severe(u'Problem with "%s" directive:\n%s' %
                               (self.name, ErrorString(error)))
Beispiel #11
0
    def report_Exception(self, error):
        if isinstance(error, utils.SystemMessage):
            self.report_SystemMessage(error)
        elif isinstance(error, UnicodeEncodeError):
            self.report_UnicodeError(error)
        elif isinstance(error, io.InputError):
            self._stderr.write(u'Unable to open source file for reading:\n'
                               u'  %s\n' % ErrorString(error))
        elif isinstance(error, io.OutputError):
            self._stderr.write(
                u'Unable to open destination file for writing:\n'
                u'  %s\n' % ErrorString(error))
        else:
            print >>self._stderr, u'%s' % ErrorString(error)
            print >>self._stderr, ("""\
Exiting due to error.  Use "--traceback" to diagnose.
Please report errors to <*****@*****.**>.
Include "--traceback" output, Docutils version (%s [%s]),
Python version (%s), your OS type & version, and the
command line used.""" % (__version__, __version_details__,
                         sys.version.split()[0]))
Beispiel #12
0
    def __init__(self,
                 source=None,
                 source_path=None,
                 encoding=None,
                 error_handler='strict',
                 autoclose=True,
                 handle_io_errors=True,
                 mode='rU'):
        """
        :Parameters:
            - `source`: either a file-like object (which is read directly), or
              `None` (which implies `sys.stdin` if no `source_path` given).
            - `source_path`: a path to a file, which is opened and then read.
            - `encoding`: the expected text encoding of the input file.
            - `error_handler`: the encoding error handler to use.
            - `autoclose`: close automatically after read (except when
              `sys.stdin` is the source).
            - `handle_io_errors`: summarize I/O errors here, and exit?
            - `mode`: how the file is to be opened (see standard function
              `open`). The default 'rU' provides universal newline support
              for text files.
        """
        Input.__init__(self, source, source_path, encoding, error_handler)
        self.autoclose = autoclose
        self.handle_io_errors = handle_io_errors
        self._stderr = ErrorOutput()

        if source is None:
            if source_path:
                # Specify encoding in Python 3
                if sys.version_info >= (3, 0):
                    kwargs = {
                        'encoding': self.encoding,
                        'errors': self.error_handler
                    }
                else:
                    kwargs = {}

                try:
                    self.source = open(source_path, mode, **kwargs)
                except IOError, error:
                    if not handle_io_errors:
                        raise
                    print >> self._stderr, ErrorString(error)
                    print >> self._stderr, (
                        u'Unable to open source'
                        u" file for reading ('%s'). Exiting." % source_path)
                    sys.exit(1)
            else:
                self.source = sys.stdin
Beispiel #13
0
    def decode(self, data):
        """
        Decode a string, `data`, heuristically.
        Raise UnicodeError if unsuccessful.

        The client application should call ``locale.setlocale`` at the
        beginning of processing::

            locale.setlocale(locale.LC_ALL, '')
        """
        if self.encoding and self.encoding.lower() == 'unicode':
            assert isinstance(data, str), ('input encoding is "unicode" '
                                           'but input is not a unicode object')
        if isinstance(data, str):
            # Accept unicode even if self.encoding != 'unicode'.
            return data
        if self.encoding:
            # We believe the user/application when the encoding is
            # explicitly given.
            encodings = [self.encoding]
        else:
            data_encoding = self.determine_encoding_from_data(data)
            if data_encoding:
                # If the data declares its encoding (explicitly or via a BOM),
                # we believe it.
                encodings = [data_encoding]
            else:
                # Apply heuristics only if no encoding is explicitly given and
                # no BOM found.  Start with UTF-8, because that only matches
                # data that *IS* UTF-8:
                encodings = ['utf-8', 'latin-1']
                if locale_encoding:
                    encodings.insert(1, locale_encoding)
        for enc in encodings:
            try:
                decoded = str(data, enc, self.error_handler)
                self.successful_encoding = enc
                # Return decoded, removing BOMs.
                return decoded.replace('\ufeff', '')
            except (UnicodeError, LookupError) as err:
                error = err  # in Python 3, the <exception instance> is
                # local to the except clause
        raise UnicodeError(
            'Unable to decode input data.  Tried the following encodings: '
            '%s.\n(%s)' %
            (', '.join([repr(enc) for enc in encodings]), ErrorString(error)))
Beispiel #14
0
 def open(self):
     # Specify encoding in Python 3.
     if sys.version_info >= (3, 0):
         kwargs = {'encoding': self.encoding, 'errors': self.error_handler}
     else:
         kwargs = {}
     try:
         self.destination = open(self.destination_path, self.mode, **kwargs)
     except IOError, error:
         if self.handle_io_errors:
             print >> self._stderr, ErrorString(error)
             print >> self._stderr, (u'Unable to open destination file'
                                     u" for writing ('%s').  Exiting." %
                                     self.destination_path)
             sys.exit(1)
         raise OutputError(error.errno, error.strerror,
                           self.destination_path)
Beispiel #15
0
 def open(self):
     # Specify encoding in Python 3.
     # (Do not use binary mode ('wb') as this prevents the
     # conversion of newlines to the system specific default.)
     if sys.version_info >= (3, 0):
         kwargs = {'encoding': self.encoding, 'errors': self.error_handler}
     else:
         kwargs = {}
     try:
         self.destination = open(self.destination_path, 'w', **kwargs)
     except IOError, error:
         if not self.handle_io_errors:
             raise
         print >> self._stderr, ErrorString(error)
         print >> self._stderr, (u'Unable to open destination file'
                                 u" for writing ('%s').  Exiting." %
                                 self.destination_path)
         sys.exit(1)
Beispiel #16
0
 def process(self, opt, value, values, parser):
     """
     Call the validator function on applicable settings and
     evaluate the 'overrides' option.
     Extends `optparse.Option.process`.
     """
     result = optparse.Option.process(self, opt, value, values, parser)
     setting = self.dest
     if setting:
         if self.validator:
             value = getattr(values, setting)
             try:
                 new_value = self.validator(setting, value, parser)
             except Exception, error:
                 raise (optparse.OptionValueError(
                     'Error in option "%s":\n    %s' %
                     (opt, ErrorString(error))), None, sys.exc_info()[2])
             setattr(values, setting, new_value)
         if self.overrides:
             setattr(values, self.overrides, None)
Beispiel #17
0
 def run(self):
     if not isinstance(self.state, states.SubstitutionDef):
         raise self.error(
             'Invalid context: the "%s" directive can only be used within '
             'a substitution definition.' % self.name)
     substitution_definition = self.state_machine.node
     if 'trim' in self.options:
         substitution_definition.attributes['ltrim'] = 1
         substitution_definition.attributes['rtrim'] = 1
     if 'ltrim' in self.options:
         substitution_definition.attributes['ltrim'] = 1
     if 'rtrim' in self.options:
         substitution_definition.attributes['rtrim'] = 1
     codes = self.comment_pattern.split(self.arguments[0])[0].split()
     element = nodes.Element()
     for code in codes:
         try:
             decoded = directives.unicode_code(code)
         except ValueError, error:
             raise self.error(u'Invalid character code: %s\n%s' %
                              (code, ErrorString(error)))
         element += nodes.Text(decoded)
Beispiel #18
0
class Include(Directive):
    """
    Include content read from a separate source file.

    Content may be parsed by the parser, or included as a literal
    block.  The encoding of the included file can be specified.  Only
    a part of the given file argument may be included by specifying
    start and end line or text to match before and/or after the text
    to be used.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {
        'literal': directives.flag,
        'code': directives.unchanged,
        'encoding': directives.encoding,
        'tab-width': int,
        'start-line': int,
        'end-line': int,
        'start-after': directives.unchanged_required,
        'end-before': directives.unchanged_required,
        # ignored except for 'literal' or 'code':
        'number-lines': directives.unchanged,  # integer or None
        'class': directives.class_option,
        'name': directives.unchanged
    }

    standard_include_path = os.path.join(os.path.dirname(states.__file__),
                                         'include')

    def run(self):
        """Include a file as part of the content of this reST file."""
        if not self.state.document.settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        source = self.state_machine.input_lines.source(
            self.lineno - self.state_machine.input_offset - 1)
        source_dir = os.path.dirname(os.path.abspath(source))
        path = directives.path(self.arguments[0])
        if path.startswith('<') and path.endswith('>'):
            path = os.path.join(self.standard_include_path, path[1:-1])
        path = os.path.normpath(os.path.join(source_dir, path))
        path = utils.relative_path(None, path)
        path = nodes.reprunicode(path)
        encoding = self.options.get(
            'encoding', self.state.document.settings.input_encoding)
        tab_width = self.options.get('tab-width',
                                     self.state.document.settings.tab_width)
        try:
            self.state.document.settings.record_dependencies.add(path)
            include_file = io.FileInput(
                source_path=path, encoding=encoding,
                error_handler=(self.state.document.settings.\
                               input_encoding_error_handler),
                handle_io_errors=None)
        except IOError, error:
            raise self.severe(u'Problems with "%s" directive path:\n%s.' %
                              (self.name, ErrorString(error)))
        startline = self.options.get('start-line', None)
        endline = self.options.get('end-line', None)
        try:
            if startline or (endline is not None):
                lines = include_file.readlines()
                rawtext = ''.join(lines[startline:endline])
            else:
                rawtext = include_file.read()
        except UnicodeError, error:
            raise self.severe(u'Problem with "%s" directive:\n%s' %
                              (self.name, ErrorString(error)))
Beispiel #19
0
    def __init__(self,
                 source=None,
                 source_path=None,
                 encoding=None,
                 error_handler='strict',
                 autoclose=True,
                 handle_io_errors=True,
                 mode='rU'):
        """
        :Parameters:
            - `source`: either a file-like object (which is read directly), or
              `None` (which implies `sys.stdin` if no `source_path` given).
            - `source_path`: a path to a file, which is opened and then read.
            - `encoding`: the expected text encoding of the input file.
            - `error_handler`: the encoding error handler to use.
            - `autoclose`: close automatically after read (except when
              `sys.stdin` is the source).
            - `handle_io_errors`: summarize I/O errors here, and exit?
            - `mode`: how the file is to be opened (see standard function
              `open`). The default 'rU' provides universal newline support
              for text files.
        """
        Input.__init__(self, source, source_path, encoding, error_handler)
        self.autoclose = autoclose
        self.handle_io_errors = handle_io_errors
        self._stderr = ErrorOutput()

        if source is None:
            if source_path:
                # Specify encoding in Python 3
                if sys.version_info >= (3, 0):
                    kwargs = {
                        'encoding': self.encoding,
                        'errors': self.error_handler
                    }
                else:
                    kwargs = {}

                try:
                    self.source = open(source_path, mode, **kwargs)
                except IOError as error:
                    if handle_io_errors:
                        print(ErrorString(error), file=self._stderr)
                        print(('Unable to open source file for reading ("%s").'
                               'Exiting.' % source_path),
                              file=self._stderr)
                        sys.exit(1)
                    raise InputError(error.errno, error.strerror, source_path)
            else:
                self.source = sys.stdin
        elif (sys.version_info >= (3, 0)
              and check_encoding(self.source, self.encoding) is False):
            # TODO: re-open, warn or raise error?
            raise UnicodeError('Encoding clash: encoding given is "%s" '
                               'but source is opened with encoding "%s".' %
                               (self.encoding, self.source.encoding))
        if not source_path:
            try:
                self.source_path = self.source.name
            except AttributeError:
                pass
Beispiel #20
0
    def run(self):
        """Include a file as part of the content of this reST file."""
        if not self.state.document.settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        source = self.state_machine.input_lines.source(
            self.lineno - self.state_machine.input_offset - 1)
        source_dir = os.path.dirname(os.path.abspath(source))
        path = directives.path(self.arguments[0])
        if path.startswith('<') and path.endswith('>'):
            path = os.path.join(self.standard_include_path, path[1:-1])
        path = os.path.normpath(os.path.join(source_dir, path))
        path = utils.relative_path(None, path)
        path = nodes.reprunicode(path)
        encoding = self.options.get(
            'encoding', self.state.document.settings.input_encoding)
        tab_width = self.options.get(
            'tab-width', self.state.document.settings.tab_width)
        try:
            self.state.document.settings.record_dependencies.add(path)
            include_file = io.FileInput(
                source_path=path, encoding=encoding,
                error_handler=(self.state.document.settings.\
                               input_encoding_error_handler),
                handle_io_errors=None)
        except UnicodeEncodeError as error:
            raise self.severe('Problems with "%s" directive path:\n'
                              'Cannot encode input file path "%s" '
                              '(wrong locale?).' %
                              (self.name, SafeString(path)))
        except IOError as error:
            raise self.severe('Problems with "%s" directive path:\n%s.' %
                      (self.name, ErrorString(error)))
        startline = self.options.get('start-line', None)
        endline = self.options.get('end-line', None)
        try:
            if startline or (endline is not None):
                lines = include_file.readlines()
                rawtext = ''.join(lines[startline:endline])
            else:
                rawtext = include_file.read()
        except UnicodeError as error:
            raise self.severe('Problem with "%s" directive:\n%s' %
                              (self.name, ErrorString(error)))
        # start-after/end-before: no restrictions on newlines in match-text,
        # and no restrictions on matching inside lines vs. line boundaries
        after_text = self.options.get('start-after', None)
        if after_text:
            # skip content in rawtext before *and incl.* a matching text
            after_index = rawtext.find(after_text)
            if after_index < 0:
                raise self.severe('Problem with "start-after" option of "%s" '
                                  'directive:\nText not found.' % self.name)
            rawtext = rawtext[after_index + len(after_text):]
        before_text = self.options.get('end-before', None)
        if before_text:
            # skip content in rawtext after *and incl.* a matching text
            before_index = rawtext.find(before_text)
            if before_index < 0:
                raise self.severe('Problem with "end-before" option of "%s" '
                                  'directive:\nText not found.' % self.name)
            rawtext = rawtext[:before_index]

        include_lines = statemachine.string2lines(rawtext, tab_width,
                                                  convert_whitespace=True)
        if 'literal' in self.options:
            # Convert tabs to spaces, if `tab_width` is positive.
            if tab_width >= 0:
                text = rawtext.expandtabs(tab_width)
            else:
                text = rawtext
            literal_block = nodes.literal_block(rawtext, source=path,
                                    classes=self.options.get('class', []))
            literal_block.line = 1
            self.add_name(literal_block)
            if 'number-lines' in self.options:
                try:
                    startline = int(self.options['number-lines'] or 1)
                except ValueError:
                    raise self.error(':number-lines: with non-integer '
                                     'start value')
                endline = startline + len(include_lines)
                if text.endswith('\n'):
                    text = text[:-1]
                tokens = NumberLines([([], text)], startline, endline)
                for classes, value in tokens:
                    if classes:
                        literal_block += nodes.inline(value, value,
                                                      classes=classes)
                    else:
                        literal_block += nodes.Text(value, value)
            else:
                literal_block += nodes.Text(text, text)
            return [literal_block]
        if 'code' in self.options:
            self.options['source'] = path
            codeblock = CodeBlock(self.name,
                                  [self.options.pop('code')], # arguments
                                  self.options,
                                  include_lines, # content
                                  self.lineno,
                                  self.content_offset,
                                  self.block_text,
                                  self.state,
                                  self.state_machine)
            return codeblock.run()
        self.state_machine.insert_input(include_lines, path)
        return []
Beispiel #21
0
 def run(self):
     if (not self.state.document.settings.raw_enabled
         or (not self.state.document.settings.file_insertion_enabled
             and ('file' in self.options
                  or 'url' in self.options))):
         raise self.warning('"%s" directive disabled.' % self.name)
     attributes = {'format': ' '.join(self.arguments[0].lower().split())}
     encoding = self.options.get(
         'encoding', self.state.document.settings.input_encoding)
     if self.content:
         if 'file' in self.options or 'url' in self.options:
             raise self.error(
                 '"%s" directive may not both specify an external file '
                 'and have content.' % self.name)
         text = '\n'.join(self.content)
     elif 'file' in self.options:
         if 'url' in self.options:
             raise self.error(
                 'The "file" and "url" options may not be simultaneously '
                 'specified for the "%s" directive.' % self.name)
         source_dir = os.path.dirname(
             os.path.abspath(self.state.document.current_source))
         path = os.path.normpath(os.path.join(source_dir,
                                              self.options['file']))
         path = utils.relative_path(None, path)
         try:
             raw_file = io.FileInput(
                 source_path=path, encoding=encoding,
                 error_handler=(self.state.document.settings.\
                                input_encoding_error_handler),
                 handle_io_errors=None)
             # TODO: currently, raw input files are recorded as
             # dependencies even if not used for the chosen output format.
             self.state.document.settings.record_dependencies.add(path)
         except IOError as error:
             raise self.severe('Problems with "%s" directive path:\n%s.'
                               % (self.name, ErrorString(error)))
         try:
             text = raw_file.read()
         except UnicodeError as error:
             raise self.severe('Problem with "%s" directive:\n%s'
                 % (self.name, ErrorString(error)))
         attributes['source'] = path
     elif 'url' in self.options:
         source = self.options['url']
         # Do not import urllib2 at the top of the module because
         # it may fail due to broken SSL dependencies, and it takes
         # about 0.15 seconds to load.
         import urllib.request, urllib.error, urllib.parse
         try:
             raw_text = urllib.request.urlopen(source).read()
         except (urllib.error.URLError, IOError, OSError) as error:
             raise self.severe('Problems with "%s" directive URL "%s":\n%s.'
                 % (self.name, self.options['url'], ErrorString(error)))
         raw_file = io.StringInput(
             source=raw_text, source_path=source, encoding=encoding,
             error_handler=(self.state.document.settings.\
                            input_encoding_error_handler))
         try:
             text = raw_file.read()
         except UnicodeError as error:
             raise self.severe('Problem with "%s" directive:\n%s'
                               % (self.name, ErrorString(error)))
         attributes['source'] = source
     else:
         # This will always fail because there is no content.
         self.assert_has_content()
     raw_node = nodes.raw('', text, **attributes)
     (raw_node.source,
     raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
     return [raw_node]
Beispiel #22
0
class Input(TransformSpec):
    """
    Abstract base class for input wrappers.
    """

    component_type = 'input'

    default_source_path = None

    def __init__(self,
                 source=None,
                 source_path=None,
                 encoding=None,
                 error_handler='strict'):
        self.encoding = encoding
        """Text encoding for the input source."""

        self.error_handler = error_handler
        """Text decoding error handler."""

        self.source = source
        """The source of input data."""

        self.source_path = source_path
        """A text reference to the source."""

        if not source_path:
            self.source_path = self.default_source_path

        self.successful_encoding = None
        """The encoding that successfully decoded the source data."""

    def __repr__(self):
        return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
                                                  self.source_path)

    def read(self):
        raise NotImplementedError

    def decode(self, data):
        """
        Decode a string, `data`, heuristically.
        Raise UnicodeError if unsuccessful.

        The client application should call ``locale.setlocale`` at the
        beginning of processing::

            locale.setlocale(locale.LC_ALL, '')
        """
        if self.encoding and self.encoding.lower() == 'unicode':
            assert isinstance(data,
                              unicode), ('input encoding is "unicode" '
                                         'but input is not a unicode object')
        if isinstance(data, unicode):
            # Accept unicode even if self.encoding != 'unicode'.
            return data
        if self.encoding:
            # We believe the user/application when the encoding is
            # explicitly given.
            encodings = [self.encoding]
        else:
            data_encoding = self.determine_encoding_from_data(data)
            if data_encoding:
                # If the data declares its encoding (explicitly or via a BOM),
                # we believe it.
                encodings = [data_encoding]
            else:
                # Apply heuristics only if no encoding is explicitly given and
                # no BOM found.  Start with UTF-8, because that only matches
                # data that *IS* UTF-8:
                encodings = ['utf-8', 'latin-1']
                if locale_encoding:
                    encodings.insert(1, locale_encoding)
        for enc in encodings:
            try:
                decoded = unicode(data, enc, self.error_handler)
                self.successful_encoding = enc
                # Return decoded, removing BOMs.
                return decoded.replace(u'\ufeff', u'')
            except (UnicodeError, LookupError), err:
                error = err  # in Python 3, the <exception instance> is
                # local to the except clause
        raise UnicodeError(
            'Unable to decode input data.  Tried the following encodings: '
            '%s.\n(%s)' %
            (', '.join([repr(enc) for enc in encodings]), ErrorString(error)))
Beispiel #23
0
     except UnicodeError, error:
         raise self.severe(u'Problem with "%s" directive:\n%s' %
                           (self.name, ErrorString(error)))
     attributes['source'] = path
 elif 'url' in self.options:
     source = self.options['url']
     # Do not import urllib2 at the top of the module because
     # it may fail due to broken SSL dependencies, and it takes
     # about 0.15 seconds to load.
     import urllib2
     try:
         raw_text = urllib2.urlopen(source).read()
     except (urllib2.URLError, IOError, OSError), error:
         raise self.severe(
             u'Problems with "%s" directive URL "%s":\n%s.' %
             (self.name, self.options['url'], ErrorString(error)))
     raw_file = io.StringInput(
         source=raw_text, source_path=source, encoding=encoding,
         error_handler=(self.state.document.settings.\
                        input_encoding_error_handler))
     try:
         text = raw_file.read()
     except UnicodeError, error:
         raise self.severe(u'Problem with "%s" directive:\n%s' %
                           (self.name, ErrorString(error)))
     attributes['source'] = source
 else:
     # This will always fail because there is no content.
     self.assert_has_content()
 raw_node = nodes.raw('', text, **attributes)
 return [raw_node]