def __init__(self, *commands, **kwargs): """Prepare subprocess(es)""" # Store arguments for the .repeat() method self.__repeatable = namespaces.Namespace(commands=commands, kwargs=kwargs.copy()) # Build the actual commands list from the provided # (non-keyword) arguments self.commands = [] for single_command in commands: if isinstance(single_command, str): appendable_command = shlex.split(single_command) else: try: appendable_command = list(single_command) except TypeError as type_error: raise ValueError('Invalid command: {0!r}'.format( single_command)) from type_error # # if appendable_command: self.commands.append(appendable_command) # # if not self.commands: raise ValueError('Please provide at least one command.') # check = kwargs.pop('check', False) execute_immediately = kwargs.pop('execute_immediately', True) input_ = kwargs.pop('input', None) intermediate_stderr = kwargs.pop('intermediate_stderr', None) if intermediate_stderr not in self.supported_intermediate_stderr: warnings.warn( 'Supported values for intermediate_stderr:' ' None, DEVNULL or STDOUT. {0!r} has been ignored and' ' substituted by None.'.format(intermediate_stderr)) intermediate_stderr = None # timeout = kwargs.pop('timeout', None) self.call_arguments = namespaces.Namespace( check=check, input=input_, intermediate_stderr=intermediate_stderr, timeout=timeout) # if input_: kwargs['stdin'] = PIPE else: kwargs['stdin'] = None # self.current_state = self.states.ready self.process_arguments = dict(self.defaults) self.process_arguments.update(kwargs) self.result = None if execute_immediately: self.execute()
def reset(self): """Reset the parser""" self.__variables = namespaces.Namespace(content_list=[], images=[], in_body=False, closed=False) super(HtmlTagStripper, self).reset()
def __init__(self, image_placeholders='with alt text only', body_reqired=True): """Instantiate the base class and define instance variables""" # Shadow the class attributes for the image placeholder strings if not image_placeholders: self.image_placeholder_with_alt_text = '' elif image_placeholders == 'with alt text only': self.image_placeholder_empty = '' # self.__body_not_reqired = not body_reqired self.__prx = namespaces.Namespace( multiple_space=re.compile(self.re_multiple_space), newline_and_whitespace=re.compile(self.re_newline_and_whitespace, re.DOTALL)) self.__variables = namespaces.Namespace() super(HtmlTagStripper, self).__init__(convert_charrefs=True)
def get_snapshot(self): """Return a Namespace with the currently collected content and the list of collected images """ collected_content = self.__prx.multiple_space.sub( constants.BLANK, constants.EMPTY.join(self.__variables.content_list)) return namespaces.Namespace( content=self.__prx.newline_and_whitespace.sub( constants.NEWLINE, collected_content).strip(), images=list(self.__variables.images))
def handle_starttag(self, tag, attrs): """Handle a start tag""" self.__add_whitespace(tag) if tag == 'body': self.__variables.in_body = True elif tag == 'img': # save images' attributes current_image = namespaces.Namespace(attrs) self.__variables.images.append(current_image) try: self.__add_content( self.image_placeholder_with_alt_text.format(current_image)) except AttributeError: self.__add_content(self.image_placeholder_empty)
def test_namespace(self): """Build a Namespace and test it""" test_object = namespaces.Namespace(roses='red', violets='blue') self.assertEqual(dir(test_object), ['roses', 'violets']) self.assertEqual(repr(test_object), "Namespace({'roses': 'red', 'violets': 'blue'})") self.assertEqual(test_object.roses, 'red') self.assertEqual(test_object.violets, 'blue') self.assertRaises(AttributeError, getattr, test_object, 'sugar') del test_object.roses test_object.sugar = 'sweet' self.assertEqual(dir(test_object), ['sugar', 'violets']) self.assertEqual(sorted(test_object.items()), [('sugar', 'sweet'), ('violets', 'blue')])
def _execution_implementation(self): """Start the subprocess(es) and set the result""" self.all_results.clear() last_command_index = len(self.commands) - 1 for current_index, current_command in enumerate(self.commands): current_arguments = namespaces.Namespace(self.process_arguments) if current_index > 0: current_input = self.all_results[current_index - 1].stdout else: current_input = self.call_arguments.input # if current_index < last_command_index: current_arguments.stdout = PIPE current_arguments.stderr = \ self.call_arguments.intermediate_stderr # self.all_results.append( subprocess.run( current_command, input=current_input, check=self.call_arguments.check, timeout=self.call_arguments.timeout, bufsize=current_arguments.bufsize, executable=current_arguments.executable, stdout=current_arguments.stdout, stderr=current_arguments.stderr, close_fds=current_arguments.close_fds, shell=current_arguments.shell, cwd=current_arguments.cwd, env=current_arguments.env, universal_newlines=current_arguments.universal_newlines, startupinfo=current_arguments.startupinfo, creationflags=current_arguments.creationflags, restore_signals=current_arguments.restore_signals, start_new_session=current_arguments.start_new_session, pass_fds=current_arguments.pass_fds, encoding=current_arguments.encoding, errors=current_arguments.errors)) # # self.current_state = self.states.finished self.result = self.all_results[last_command_index]
def test_enhanced_namespace(self): """Build an EnhancedNamespace and test it""" simple = namespaces.Namespace(roses='red', violets='blue', sugar='sweet') enhanced_1 = namespaces.EnhancedNamespace.from_object(simple) self.assertEqual(sorted(enhanced_1.items()), [('roses', 'red'), ('sugar', 'sweet'), ('violets', 'blue')]) enhanced_2 = namespaces.EnhancedNamespace.from_object( simple, names=('roses', 'violets')) self.assertEqual(sorted(enhanced_2.items()), [('roses', 'red'), ('violets', 'blue')]) enhanced_3 = namespaces.EnhancedNamespace.from_mapping( { 'a': 1, 'b': 3, 'x': 27 }, names=('x', 'a')) self.assertEqual(sorted(enhanced_3.items()), [('a', 1), ('x', 27)]) enhanced_4 = namespaces.EnhancedNamespace.from_mapping({ 'a': 1, 'b': 3, 'x': 27 }) self.assertEqual(sorted(enhanced_4.items()), [('a', 1), ('b', 3), ('x', 27)]) enhanced_5 = namespaces.EnhancedNamespace.from_sequence( [('abc', 'xxx'), ('def', 'yyy'), ('ghi', 222), ('jkl', None)], names=('def', 'jkl')) self.assertEqual(sorted(enhanced_5.items()), [('def', 'yyy'), ('jkl', None)]) enhanced_6 = namespaces.EnhancedNamespace.from_sequence([ ('abc', 'xxx'), ('def', 'yyy'), ('ghi', 222), ('jkl', None) ]) self.assertEqual(sorted(enhanced_6.items()), [('abc', 'xxx'), ('def', 'yyy'), ('ghi', 222), ('jkl', None)])
class _AbstractPipeline(): """Wrapper for a subprocess.Popen() object also storing the result Supports keyword arguments for the subprocess.Popen() objects as defined in https://docs.python.org/3.6/library/subprocess.html#popen-constructor with the exception of the deprecated preexec_fn argument. Default values are the same as documented there, except stderr and stdout (both defaulting to subprocess.PIPE). Additional keyword arguments: run_immediately (default: False) intermediate_stderr (default: None) input (default: None) timeout (default: None) """ defaults = dict(bufsize=-1, executable=None, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=True, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0, restore_signals=True, start_new_session=False, pass_fds=(), encoding=None, errors=None) states = namespaces.Namespace(ready=0, running=1, finished=2) supported_intermediate_stderr = (None, DEVNULL, STDOUT) def __init__(self, *commands, **kwargs): """Prepare subprocess(es)""" # Store arguments for the .repeat() method self.__repeatable = namespaces.Namespace(commands=commands, kwargs=kwargs.copy()) # Build the actual commands list from the provided # (non-keyword) arguments self.commands = [] for single_command in commands: if isinstance(single_command, str): appendable_command = shlex.split(single_command) else: try: appendable_command = list(single_command) except TypeError as type_error: raise ValueError('Invalid command: {0!r}'.format( single_command)) from type_error # # if appendable_command: self.commands.append(appendable_command) # # if not self.commands: raise ValueError('Please provide at least one command.') # check = kwargs.pop('check', False) execute_immediately = kwargs.pop('execute_immediately', True) input_ = kwargs.pop('input', None) intermediate_stderr = kwargs.pop('intermediate_stderr', None) if intermediate_stderr not in self.supported_intermediate_stderr: warnings.warn( 'Supported values for intermediate_stderr:' ' None, DEVNULL or STDOUT. {0!r} has been ignored and' ' substituted by None.'.format(intermediate_stderr)) intermediate_stderr = None # timeout = kwargs.pop('timeout', None) self.call_arguments = namespaces.Namespace( check=check, input=input_, intermediate_stderr=intermediate_stderr, timeout=timeout) # if input_: kwargs['stdin'] = PIPE else: kwargs['stdin'] = None # self.current_state = self.states.ready self.process_arguments = dict(self.defaults) self.process_arguments.update(kwargs) self.result = None if execute_immediately: self.execute() # def repeat(self): """Create an instance with the same parameters as the current one""" return self.__class__(*self.__repeatable.commands, **self.__repeatable.kwargs) def _execution_implementation(self): """Override this method in child classes with the implementation of the pipeline call: Start the subprocess(es) and set the result """ raise NotImplementedError def execute(self, **kwargs): """Check if self.state is ready, set self.state to running or raise an exception. Update self.call_arguments from the keyword arguments check, input, and timeout (each if provided) Execute the concrete implementation of """ if self.current_state != self.states.ready: raise IllegalStateException('Please create a new instance' ' using the .repeat() method!') # self.current_state = self.states.running for item in ('check', 'input', 'timeout'): try: self.call_arguments[item] = kwargs[item] except KeyError: continue # # self._execution_implementation() @classmethod def run(cls, *commands, **kwargs): """Create an instance, run it immediately and return its result""" kwargs['execute_immediately'] = True pipeline = cls(*commands, **kwargs) return pipeline.result
def _execution_implementation(self): """Start the subprocess(es) and set the result""" processes = [] last_command_index = len(self.commands) - 1 if last_command_index > 0: # We communicate() only with the last process in the pipeline. # If there is more than one process, input is ignored, # and a warning is issued. if self.call_arguments.input is not None: warnings.warn( 'Input {0.call_arguments.input!r} has been ignored.' ' Use the ProcessChain class to avoid this.'.format(self)) self.call_arguments.input = None self.process_arguments['stdin'] = None # for current_index, current_command in enumerate(self.commands): current_arguments = namespaces.Namespace(self.process_arguments) if current_index > 0: current_arguments.stdin = processes[current_index - 1].stdout # if current_index < last_command_index: current_arguments.stdout = PIPE current_arguments.stderr = \ self.call_arguments.intermediate_stderr # try: current_process = subprocess.Popen( current_command, bufsize=current_arguments.bufsize, executable=current_arguments.executable, stdin=current_arguments.stdin, stdout=current_arguments.stdout, stderr=current_arguments.stderr, close_fds=current_arguments.close_fds, shell=current_arguments.shell, cwd=current_arguments.cwd, env=current_arguments.env, universal_newlines=current_arguments.universal_newlines, startupinfo=current_arguments.startupinfo, creationflags=current_arguments.creationflags, restore_signals=current_arguments.restore_signals, start_new_session=current_arguments.start_new_session, pass_fds=current_arguments.pass_fds, encoding=current_arguments.encoding, errors=current_arguments.errors) except (OSError, ValueError): self.current_state = self.states.finished raise # processes.append(current_process) # # Close stdout to allow processes to receive SIGPIPE. for current_index in range(last_command_index): processes[current_index].stdout.close() # # Communicate with the last process in the pipeline. # Mimick subprocess.run() behaviour as in # https://github.com/python/cpython/blob/3.6/Lib/subprocess.py#L424 last_process = processes[last_command_index] try: stdout, stderr = last_process.communicate( input=self.call_arguments.input, timeout=self.call_arguments.timeout) except subprocess.TimeoutExpired as timeout_expired: last_process.kill() stdout, stderr = last_process.communicate() raise subprocess.TimeoutExpired(last_process.args, self.call_arguments.timeout, output=stdout, stderr=stderr) from timeout_expired # returncode = last_process.poll() if self.call_arguments.check and returncode: raise subprocess.CalledProcessError(returncode, last_process.args, output=stdout, stderr=stderr) # self.result = subprocess.CompletedProcess(last_process.args, returncode, stdout=stdout, stderr=stderr) # processes cleanup; avoid ResourceWarnings for current_index in range(last_command_index): processes[current_index].wait() # self.current_state = self.states.finished