def test_min_if_exist(self): assert min_if_exist(None, None) is None assert min_if_exist(1, None) == 1 assert min_if_exist(None, 0) == 0 assert min_if_exist(5, 3) == 3 assert min_if_exist(1.2, 8.12) == 1.2 assert min_if_exist(-0.2, -8.9) == -8.9
def find_next_comment(text, start_index=0, end_index=None): """ Finds the next comment in `text` starting from `start_index` until `end_index` (or the end of the text if it wasn't provided). Detects both new-style comments ('//') and old-style comments (';'). @returns: the index of the beginning of the comment, or `None` if if no comment was found. """ if end_index is None: end_index = len(text) comment_index = find_unescaped(text, COMMENT_SYM, start_index, end_index) old_comment_index = find_unescaped(text, OLD_COMMENT_SYM, start_index, end_index) return min_if_exist(comment_index, old_comment_index)
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a boolean with key `inside_choice` that is `True` when the current word is inside a choice and `False` otherwise. If this boolean is not in `kwargs`, defaults to `False`. ´kwargs´ can also contain a boolean with key `parsing_slot_def` which is `True` iff the current is in a rule inside a slot definition. If this boolean is not in `kwargs`, defaults to `False`. """ inside_choice = kwargs.get("inside_choice", False) parsing_slot_def = kwargs.get("parsing_slot_def", False) # TODO this might be better using regexes if self._text[self._start_index].isspace(): self.error_msg = \ "Invalid token. Expected a word instead of a whitespace there." return False # Find whitespace after the word next_word_index = self._start_index + 1 # NOTE exclusive while True: if (next_word_index == len(self._text) or self._text[next_word_index].isspace()): break next_word_index += 1 next_word_index = \ min_if_exist( next_word_index, find_next_comment(self._text, self._start_index) ) if next_word_index == self._start_index: self.error_msg = "Invalid token. Expected a word to start here." return False for current_char in RuleWord._should_be_escaped_chars: if next_word_index == self._start_index: break next_word_index = \ min_if_exist( next_word_index, find_unescaped(self._text, current_char, self._start_index) ) if inside_choice and next_word_index > self._start_index: for char_to_escape in RuleWord._should_be_escaped_in_choices_chars: next_word_index = \ min_if_exist( next_word_index, find_unescaped( self._text, char_to_escape, self._start_index ) ) if parsing_slot_def and next_word_index > self._start_index: for char_to_escape in RuleWord._should_be_escaped_in_slot_def_chars: next_word_index = \ min_if_exist( next_word_index, find_unescaped( self._text, char_to_escape, self._start_index ) ) if next_word_index == self._start_index: self.error_msg = "Invalid token. Expected a word to start here." return False word = self._text[self._start_index:next_word_index] self._next_index = next_word_index self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.word, word)) return True
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a value with key `extracting_key`. `extracting_key` is a boolean that is `True` if this rule should extract a key and `False` if this rule should extract a value. If `kwargs` doesn't contain `extracting_key`, defaults to `True`. """ extracting_key = kwargs.get("extracting_key", True) if extracting_key: terminal_type = TerminalType.key else: terminal_type = TerminalType.value encloser = None for current_encloser in KEY_VAL_ENCLOSERS: if self._text.startswith(current_encloser, self._next_index): self._next_index += 1 self._update_furthest_matched_index() encloser = current_encloser break if encloser is not None: # Enclosed key/value next_encloser_index = \ find_unescaped(self._text, encloser, self._next_index) if next_encloser_index is None: self.error_msg = \ "Missing key-value encloser. Expected symbol " + encloser + \ " instead of end of line." return False extracted_text = self._text[self._start_index+1:next_encloser_index] self._next_index = next_encloser_index + 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(terminal_type, extracted_text)) return True else: # Key/value not enclosed end_annotation_index = \ find_unescaped(self._text, ANNOTATION_END, self._next_index) if extracting_key: next_connector_index = \ find_unescaped( self._text, KEY_VAL_CONNECTOR, self._next_index ) end_key_value_index = \ min_if_exist(next_connector_index, end_annotation_index) else: # Extracting value next_key_val_pair_index = \ find_unescaped( self._text, ANNOTATION_SEP, self._next_index ) end_key_value_index = \ min_if_exist(next_key_val_pair_index, end_annotation_index) if end_key_value_index is None: self.error_msg = \ "Couldn't find the end of key/value. " + \ "Didn't expect the end of the line there." return False extracted_text = \ self._text[self._start_index:end_key_value_index].rstrip() self._next_index += len(extracted_text) self._update_furthest_matched_index() self._tokens.append(LexicalToken(terminal_type, extracted_text)) return True