Beispiel #1
0
 def __init__(self, *commands, **kwargs):
     """Prepare subprocess(es)"""
     # Store arguments for the .repeat() method
     self.__repeatable = namespaces.Namespace(commands=commands,
                                              kwargs=kwargs.copy())
     # Build the actual commands list from the provided
     # (non-keyword) arguments
     self.commands = []
     for single_command in commands:
         if isinstance(single_command, str):
             appendable_command = shlex.split(single_command)
         else:
             try:
                 appendable_command = list(single_command)
             except TypeError as type_error:
                 raise ValueError('Invalid command: {0!r}'.format(
                     single_command)) from type_error
             #
         #
         if appendable_command:
             self.commands.append(appendable_command)
         #
     #
     if not self.commands:
         raise ValueError('Please provide at least one command.')
     #
     check = kwargs.pop('check', False)
     execute_immediately = kwargs.pop('execute_immediately', True)
     input_ = kwargs.pop('input', None)
     intermediate_stderr = kwargs.pop('intermediate_stderr', None)
     if intermediate_stderr not in self.supported_intermediate_stderr:
         warnings.warn(
             'Supported values for intermediate_stderr:'
             ' None, DEVNULL or STDOUT. {0!r} has been ignored and'
             ' substituted by None.'.format(intermediate_stderr))
         intermediate_stderr = None
     #
     timeout = kwargs.pop('timeout', None)
     self.call_arguments = namespaces.Namespace(
         check=check,
         input=input_,
         intermediate_stderr=intermediate_stderr,
         timeout=timeout)
     #
     if input_:
         kwargs['stdin'] = PIPE
     else:
         kwargs['stdin'] = None
     #
     self.current_state = self.states.ready
     self.process_arguments = dict(self.defaults)
     self.process_arguments.update(kwargs)
     self.result = None
     if execute_immediately:
         self.execute()
Beispiel #2
0
 def reset(self):
     """Reset the parser"""
     self.__variables = namespaces.Namespace(content_list=[],
                                             images=[],
                                             in_body=False,
                                             closed=False)
     super(HtmlTagStripper, self).reset()
Beispiel #3
0
 def __init__(self,
              image_placeholders='with alt text only',
              body_reqired=True):
     """Instantiate the base class and define instance variables"""
     # Shadow the class attributes for the image placeholder strings
     if not image_placeholders:
         self.image_placeholder_with_alt_text = ''
     elif image_placeholders == 'with alt text only':
         self.image_placeholder_empty = ''
     #
     self.__body_not_reqired = not body_reqired
     self.__prx = namespaces.Namespace(
         multiple_space=re.compile(self.re_multiple_space),
         newline_and_whitespace=re.compile(self.re_newline_and_whitespace,
                                           re.DOTALL))
     self.__variables = namespaces.Namespace()
     super(HtmlTagStripper, self).__init__(convert_charrefs=True)
Beispiel #4
0
 def get_snapshot(self):
     """Return a Namespace with the currently collected content
     and the list of collected images
     """
     collected_content = self.__prx.multiple_space.sub(
         constants.BLANK,
         constants.EMPTY.join(self.__variables.content_list))
     return namespaces.Namespace(
         content=self.__prx.newline_and_whitespace.sub(
             constants.NEWLINE, collected_content).strip(),
         images=list(self.__variables.images))
Beispiel #5
0
 def handle_starttag(self, tag, attrs):
     """Handle a start tag"""
     self.__add_whitespace(tag)
     if tag == 'body':
         self.__variables.in_body = True
     elif tag == 'img':
         # save images' attributes
         current_image = namespaces.Namespace(attrs)
         self.__variables.images.append(current_image)
         try:
             self.__add_content(
                 self.image_placeholder_with_alt_text.format(current_image))
         except AttributeError:
             self.__add_content(self.image_placeholder_empty)
Beispiel #6
0
 def test_namespace(self):
     """Build a Namespace and test it"""
     test_object = namespaces.Namespace(roses='red', violets='blue')
     self.assertEqual(dir(test_object), ['roses', 'violets'])
     self.assertEqual(repr(test_object),
                      "Namespace({'roses': 'red', 'violets': 'blue'})")
     self.assertEqual(test_object.roses, 'red')
     self.assertEqual(test_object.violets, 'blue')
     self.assertRaises(AttributeError, getattr, test_object, 'sugar')
     del test_object.roses
     test_object.sugar = 'sweet'
     self.assertEqual(dir(test_object), ['sugar', 'violets'])
     self.assertEqual(sorted(test_object.items()), [('sugar', 'sweet'),
                                                    ('violets', 'blue')])
Beispiel #7
0
 def _execution_implementation(self):
     """Start the subprocess(es) and set the result"""
     self.all_results.clear()
     last_command_index = len(self.commands) - 1
     for current_index, current_command in enumerate(self.commands):
         current_arguments = namespaces.Namespace(self.process_arguments)
         if current_index > 0:
             current_input = self.all_results[current_index - 1].stdout
         else:
             current_input = self.call_arguments.input
         #
         if current_index < last_command_index:
             current_arguments.stdout = PIPE
             current_arguments.stderr = \
                 self.call_arguments.intermediate_stderr
         #
         self.all_results.append(
             subprocess.run(
                 current_command,
                 input=current_input,
                 check=self.call_arguments.check,
                 timeout=self.call_arguments.timeout,
                 bufsize=current_arguments.bufsize,
                 executable=current_arguments.executable,
                 stdout=current_arguments.stdout,
                 stderr=current_arguments.stderr,
                 close_fds=current_arguments.close_fds,
                 shell=current_arguments.shell,
                 cwd=current_arguments.cwd,
                 env=current_arguments.env,
                 universal_newlines=current_arguments.universal_newlines,
                 startupinfo=current_arguments.startupinfo,
                 creationflags=current_arguments.creationflags,
                 restore_signals=current_arguments.restore_signals,
                 start_new_session=current_arguments.start_new_session,
                 pass_fds=current_arguments.pass_fds,
                 encoding=current_arguments.encoding,
                 errors=current_arguments.errors))
         #
     #
     self.current_state = self.states.finished
     self.result = self.all_results[last_command_index]
Beispiel #8
0
 def test_enhanced_namespace(self):
     """Build an EnhancedNamespace and test it"""
     simple = namespaces.Namespace(roses='red',
                                   violets='blue',
                                   sugar='sweet')
     enhanced_1 = namespaces.EnhancedNamespace.from_object(simple)
     self.assertEqual(sorted(enhanced_1.items()), [('roses', 'red'),
                                                   ('sugar', 'sweet'),
                                                   ('violets', 'blue')])
     enhanced_2 = namespaces.EnhancedNamespace.from_object(
         simple, names=('roses', 'violets'))
     self.assertEqual(sorted(enhanced_2.items()), [('roses', 'red'),
                                                   ('violets', 'blue')])
     enhanced_3 = namespaces.EnhancedNamespace.from_mapping(
         {
             'a': 1,
             'b': 3,
             'x': 27
         }, names=('x', 'a'))
     self.assertEqual(sorted(enhanced_3.items()), [('a', 1), ('x', 27)])
     enhanced_4 = namespaces.EnhancedNamespace.from_mapping({
         'a': 1,
         'b': 3,
         'x': 27
     })
     self.assertEqual(sorted(enhanced_4.items()), [('a', 1), ('b', 3),
                                                   ('x', 27)])
     enhanced_5 = namespaces.EnhancedNamespace.from_sequence(
         [('abc', 'xxx'), ('def', 'yyy'), ('ghi', 222), ('jkl', None)],
         names=('def', 'jkl'))
     self.assertEqual(sorted(enhanced_5.items()), [('def', 'yyy'),
                                                   ('jkl', None)])
     enhanced_6 = namespaces.EnhancedNamespace.from_sequence([
         ('abc', 'xxx'), ('def', 'yyy'), ('ghi', 222), ('jkl', None)
     ])
     self.assertEqual(sorted(enhanced_6.items()), [('abc', 'xxx'),
                                                   ('def', 'yyy'),
                                                   ('ghi', 222),
                                                   ('jkl', None)])
Beispiel #9
0
class _AbstractPipeline():
    """Wrapper for a subprocess.Popen() object
    also storing the result

    Supports keyword arguments for the subprocess.Popen() objects as defined in
    https://docs.python.org/3.6/library/subprocess.html#popen-constructor
    with the exception of the deprecated preexec_fn argument.
    Default values are the same as documented there, except stderr and stdout
    (both defaulting to subprocess.PIPE).

    Additional keyword arguments:
        run_immediately (default: False)
        intermediate_stderr (default: None)
        input (default: None)
        timeout (default: None)
    """

    defaults = dict(bufsize=-1,
                    executable=None,
                    stdin=None,
                    stdout=PIPE,
                    stderr=PIPE,
                    close_fds=True,
                    shell=False,
                    cwd=None,
                    env=None,
                    universal_newlines=False,
                    startupinfo=None,
                    creationflags=0,
                    restore_signals=True,
                    start_new_session=False,
                    pass_fds=(),
                    encoding=None,
                    errors=None)
    states = namespaces.Namespace(ready=0, running=1, finished=2)
    supported_intermediate_stderr = (None, DEVNULL, STDOUT)

    def __init__(self, *commands, **kwargs):
        """Prepare subprocess(es)"""
        # Store arguments for the .repeat() method
        self.__repeatable = namespaces.Namespace(commands=commands,
                                                 kwargs=kwargs.copy())
        # Build the actual commands list from the provided
        # (non-keyword) arguments
        self.commands = []
        for single_command in commands:
            if isinstance(single_command, str):
                appendable_command = shlex.split(single_command)
            else:
                try:
                    appendable_command = list(single_command)
                except TypeError as type_error:
                    raise ValueError('Invalid command: {0!r}'.format(
                        single_command)) from type_error
                #
            #
            if appendable_command:
                self.commands.append(appendable_command)
            #
        #
        if not self.commands:
            raise ValueError('Please provide at least one command.')
        #
        check = kwargs.pop('check', False)
        execute_immediately = kwargs.pop('execute_immediately', True)
        input_ = kwargs.pop('input', None)
        intermediate_stderr = kwargs.pop('intermediate_stderr', None)
        if intermediate_stderr not in self.supported_intermediate_stderr:
            warnings.warn(
                'Supported values for intermediate_stderr:'
                ' None, DEVNULL or STDOUT. {0!r} has been ignored and'
                ' substituted by None.'.format(intermediate_stderr))
            intermediate_stderr = None
        #
        timeout = kwargs.pop('timeout', None)
        self.call_arguments = namespaces.Namespace(
            check=check,
            input=input_,
            intermediate_stderr=intermediate_stderr,
            timeout=timeout)
        #
        if input_:
            kwargs['stdin'] = PIPE
        else:
            kwargs['stdin'] = None
        #
        self.current_state = self.states.ready
        self.process_arguments = dict(self.defaults)
        self.process_arguments.update(kwargs)
        self.result = None
        if execute_immediately:
            self.execute()
        #

    def repeat(self):
        """Create an instance with the same parameters as the current one"""
        return self.__class__(*self.__repeatable.commands,
                              **self.__repeatable.kwargs)

    def _execution_implementation(self):
        """Override this method in child classes with the implementation
        of the pipeline call: Start the subprocess(es) and set the result
        """
        raise NotImplementedError

    def execute(self, **kwargs):
        """Check if self.state is ready, set self.state to running
        or raise an exception.
        Update self.call_arguments from the keyword arguments
        check, input, and timeout (each if provided)
        Execute the concrete implementation of
        """
        if self.current_state != self.states.ready:
            raise IllegalStateException('Please create a new instance'
                                        ' using the .repeat() method!')
        #
        self.current_state = self.states.running
        for item in ('check', 'input', 'timeout'):
            try:
                self.call_arguments[item] = kwargs[item]
            except KeyError:
                continue
            #
        #
        self._execution_implementation()

    @classmethod
    def run(cls, *commands, **kwargs):
        """Create an instance, run it immediately and return its result"""
        kwargs['execute_immediately'] = True
        pipeline = cls(*commands, **kwargs)
        return pipeline.result
Beispiel #10
0
 def _execution_implementation(self):
     """Start the subprocess(es) and set the result"""
     processes = []
     last_command_index = len(self.commands) - 1
     if last_command_index > 0:
         # We communicate() only with the last process in the pipeline.
         # If there is more than one process, input is ignored,
         # and a warning is issued.
         if self.call_arguments.input is not None:
             warnings.warn(
                 'Input {0.call_arguments.input!r} has been ignored.'
                 ' Use the ProcessChain class to avoid this.'.format(self))
         self.call_arguments.input = None
         self.process_arguments['stdin'] = None
     #
     for current_index, current_command in enumerate(self.commands):
         current_arguments = namespaces.Namespace(self.process_arguments)
         if current_index > 0:
             current_arguments.stdin = processes[current_index - 1].stdout
         #
         if current_index < last_command_index:
             current_arguments.stdout = PIPE
             current_arguments.stderr = \
                 self.call_arguments.intermediate_stderr
         #
         try:
             current_process = subprocess.Popen(
                 current_command,
                 bufsize=current_arguments.bufsize,
                 executable=current_arguments.executable,
                 stdin=current_arguments.stdin,
                 stdout=current_arguments.stdout,
                 stderr=current_arguments.stderr,
                 close_fds=current_arguments.close_fds,
                 shell=current_arguments.shell,
                 cwd=current_arguments.cwd,
                 env=current_arguments.env,
                 universal_newlines=current_arguments.universal_newlines,
                 startupinfo=current_arguments.startupinfo,
                 creationflags=current_arguments.creationflags,
                 restore_signals=current_arguments.restore_signals,
                 start_new_session=current_arguments.start_new_session,
                 pass_fds=current_arguments.pass_fds,
                 encoding=current_arguments.encoding,
                 errors=current_arguments.errors)
         except (OSError, ValueError):
             self.current_state = self.states.finished
             raise
         #
         processes.append(current_process)
     #
     # Close stdout to allow processes to receive SIGPIPE.
     for current_index in range(last_command_index):
         processes[current_index].stdout.close()
     #
     # Communicate with the last process in the pipeline.
     # Mimick subprocess.run() behaviour as in
     # https://github.com/python/cpython/blob/3.6/Lib/subprocess.py#L424
     last_process = processes[last_command_index]
     try:
         stdout, stderr = last_process.communicate(
             input=self.call_arguments.input,
             timeout=self.call_arguments.timeout)
     except subprocess.TimeoutExpired as timeout_expired:
         last_process.kill()
         stdout, stderr = last_process.communicate()
         raise subprocess.TimeoutExpired(last_process.args,
                                         self.call_arguments.timeout,
                                         output=stdout,
                                         stderr=stderr) from timeout_expired
     #
     returncode = last_process.poll()
     if self.call_arguments.check and returncode:
         raise subprocess.CalledProcessError(returncode,
                                             last_process.args,
                                             output=stdout,
                                             stderr=stderr)
     #
     self.result = subprocess.CompletedProcess(last_process.args,
                                               returncode,
                                               stdout=stdout,
                                               stderr=stderr)
     # processes cleanup; avoid ResourceWarnings
     for current_index in range(last_command_index):
         processes[current_index].wait()
     #
     self.current_state = self.states.finished