def next(self): try: p_start, param_format, p_end = self._get_next_param_format() param_name, default_value = self._get_param_name_default_value( param_format) except ValueError: # If we get back a ValueError then time to stop the iteration. raise StopIteration() # compute forward progress of the alias format pointer v_start = p_start - self._alias_fmt_ptr + self._param_strm_ptr value = None # make sure v_start is within param_stream if v_start < len(self._param_stream): _, value, v_end = self._get_next_value(v_start) # move the alias_fmt_ptr to one beyond the end of each self._alias_fmt_ptr = p_end self._param_strm_ptr = v_end if not value and not default_value: raise content.ParseException( 'No value supplied and no default value found.') return param_name, value if value else default_value
def next(self): try: p_start, param_format, p_end = self._get_next_param_format() param_name, default_value = self._get_param_name_default_value(param_format) except ValueError: # If we get back a ValueError then time to stop the iteration. raise StopIteration() # compute forward progress of the alias format pointer v_start = p_start - self._alias_fmt_ptr + self._param_strm_ptr value = None # make sure v_start is within param_stream if v_start < len(self._param_stream): _, value, v_end = self._get_next_value(v_start) # move the alias_fmt_ptr to one beyond the end of each self._alias_fmt_ptr = p_end self._param_strm_ptr = v_end elif v_start < len(self._format): # Advance in the format string # Note: We still want to advance in the format string even though # there is nothing left in the param stream since we support default # values and param_stream can be empty self._alias_fmt_ptr = p_end if not value and not default_value: raise content.ParseException('No value supplied and no default value found.') return param_name, value if value else default_value
def _get_next_param_format(self): mrkr_strt_ps = self._format.index(self.FORMAT_MARKER_START, self._alias_fmt_ptr) try: mrkr_end_ps = self._format.index(self.FORMAT_MARKER_END, mrkr_strt_ps) except ValueError: # A start marker was found but end is not therefore this is a Parser exception. raise content.ParseException('Expected end marker.') param_format = self._format[mrkr_strt_ps + len(self.FORMAT_MARKER_START): mrkr_end_ps] return mrkr_strt_ps, param_format.strip(), mrkr_end_ps + len(self.FORMAT_MARKER_END)
def parse(start, stream): end = stream.find(DefaultParser.end, start) # if not found pick until end of stream. In this way the default parser is different # from other parser as they would always requires an end marker if end == -1: end = len(stream) try: return start, stream[start:end], end except IndexError: raise content.ParseException('What sort of messed up stream did you provide!')
def parse(start, stream): end = 0 char_idx = start + 1 while not end: char = stream[char_idx] if char == StringValueParser.end and stream[char_idx - 1] != StringValueParser.escape: end = char_idx else: char_idx += 1 if char_idx == len(stream): raise content.ParseException('What sort of messed up stream did you provide!') # skip the start and end chars return start, stream[start + 1:end], end + 1
def parse(start, stream): end = 0 char_idx = start message_depth = 0 while not end: char = stream[char_idx] if char == JsonValueParser.start: message_depth += 1 elif char == JsonValueParser.end: message_depth -= 1 if not message_depth: end = char_idx else: char_idx += 1 if char_idx == len(stream): raise content.ParseException('What sort of messed up stream did you provide!') # preserve the start and end chars return start, stream[start:end + 1], end + 1
def get_extracted_param_value(self): result = {} # As there's a lot of questions about using regular expressions, # I'll try to be thorough when documenting this code. # We're parsing the arbitrary key-value pairs at the end of the stream # to support passing of parameters not specified in the format string, # and cutting them from the stream as they're no longer needed. # Possible values are quoted strings, a word, or anything inside "{}". pairs_match = r'(?:^|\s+)(\S+)=("(.*?)"|\'(.*?)\'|({.*?})|(\S+))' extra = re.match(r'.*?((' + pairs_match + r'\s*)*)$', self._param_stream, re.DOTALL) if extra: kv_pairs = re.findall(pairs_match, extra.group(1), re.DOTALL) self._param_stream = self._param_stream.replace(extra.group(1), '') self._param_stream = " %s " % self._param_stream # Now we'll match parameters with default values in form of # {{ value = parameter }} (and all possible permutations of spaces), # compiling them into a list. # "test {{ url = http://google.com }} {{ extra = Test }}" will become # [ ["url", "http://google.com"], ["extra", "Test"] ] params = re.findall(r'{{\s*(.+?)\s*(?:=\s*[\'"]?({.+?}|.+?)[\'"]?)?\s*}}', self._format, re.DOTALL) # Now we're transforming our format string into a regular expression, # substituting {{ ... }} with regex named groups, so that param_stream # matched against this expression yields a dict of params with values. param_match = r'["\']?(?P<\2>(?:(?<=\').+?(?=\')|(?<=").+?(?=")|{.+?}|.+?))["\']?' reg = re.sub(r'(\s*){{\s*([^=}]+?)\s*}}(?![\'"]?\s+}})', r'\1' + param_match, self._format) reg = re.sub(r'(\s*){{\s*(\S+)\s*=\s*(?:{.+?}|.+?)\s*}}', r'(?:\1' + param_match + r')?', reg) reg = re.sub(r'(\s*){{\s*(.+?)\s*}}', r'\1' + param_match, reg) reg = '^\s*' + reg + r'\s*$' # Now we're matching param_stream against our format string regex, # getting a dict of values. We'll also get default values from # "params" list if something is not present. # Priority, from lowest to highest: # 1. Default parameters # 2. Matched parameters # 3. Extra parameters matched_stream = re.match(reg, self._param_stream, re.DOTALL) if matched_stream: values = matched_stream.groupdict() for param in params: matched_value = values[param[0]] if matched_stream else None result[param[0]] = matched_value or param[1] if extra: for pair in kv_pairs: result[pair[0]] = ''.join(pair[2:]) if self._format and not (self._param_stream.strip() or any(result.values())): raise content.ParseException('No value supplied and no default value found.') return result
def get_extracted_param_value(self): result = {} # As there's a lot of questions about using regular expressions, # I'll try to be thorough when documenting this code. # I'll split the whole convoluted regex into snippets to make it # a bit more readable (hopefully). snippets = dict() # Formats for keys and values: key is a non-spaced string, # value is anything in quotes or curly braces, or a single word. snippets['key'] = r'\s*(\S+?)\s*' snippets['value'] = r'""|\'\'|"(.+?)"|\'(.+?)\'|({.+?})|(\S+)' # Extended value: also matches unquoted text (caution). snippets['ext_value'] = r'""|\'\'|"(.+?)"|\'(.+?)\'|({.+?})|(.+?)' # Key-value pair: snippets['pairs'] = r'(?:^|\s+){key}=({value})'.format(**snippets) # End of string: multiple space-separated key-value pairs: snippets['ending'] = r'.*?(({pairs}\s*)*)$'.format(**snippets) # Default value in optional parameters: snippets['default'] = r'\s*=\s*(?:{ext_value})\s*'.format(**snippets) # Optional parameter (has a default value): snippets[ 'optional'] = '{{' + snippets['key'] + snippets['default'] + '}}' # Required parameter (no default value): snippets['required'] = '{{' + snippets['key'] + '}}' # 1. Matching the arbitrary key-value pairs at the end of the command # to support extra parameters (not specified in the format string), # and cutting them from the command string afterwards. ending_pairs = re.match(snippets['ending'], self._param_stream, re.DOTALL) if ending_pairs: kv_pairs = re.findall(snippets['pairs'], ending_pairs.group(1), re.DOTALL) self._param_stream = self._param_stream.replace( ending_pairs.group(1), '') self._param_stream = " %s " % self._param_stream # 2. Matching optional parameters (with default values). optional = re.findall(snippets['optional'], self._format, re.DOTALL) # Transforming our format string into a regular expression, # substituting {{ ... }} with regex named groups, so that param_stream # matched against this expression yields a dict of params with values. param_match = r'\1["\']?(?P<\2>(?:(?<=\').+?(?=\')|(?<=").+?(?=")|{.+?}|.+?))["\']?' reg = re.sub(r'(\s*)' + snippets['optional'], r'(?:' + param_match + r')?', self._format) reg = re.sub(r'(\s*)' + snippets['required'], param_match, reg) reg = '^\s*' + reg + r'\s*$' # 3. Matching the command against our regex to get the param values matched_stream = re.match(reg, self._param_stream, re.DOTALL) # Compiling results from the steps 1-3. if matched_stream: result = matched_stream.groupdict() for param in optional: matched_value = result[param[0]] if matched_stream else None matched_result = matched_value or ''.join(param[1:]) if matched_result is not None: result[param[0]] = matched_result if ending_pairs: for pair in kv_pairs: result[pair[0]] = ''.join(pair[2:]) if self._format and not (self._param_stream.strip() or any(result.values())): raise content.ParseException( 'No value supplied and no default value found.') return result