Beispiel #1
0
 def test_min_if_exist(self):
     assert min_if_exist(None, None) is None
     assert min_if_exist(1, None) == 1
     assert min_if_exist(None, 0) == 0
     assert min_if_exist(5, 3) == 3
     assert min_if_exist(1.2, 8.12) == 1.2
     assert min_if_exist(-0.2, -8.9) == -8.9
Beispiel #2
0
def find_next_comment(text, start_index=0, end_index=None):
    """
    Finds the next comment in `text` starting from `start_index`
    until `end_index` (or the end of the text if it wasn't provided).
    Detects both new-style comments ('//') and old-style comments (';').
    @returns: the index of the beginning of the comment, or `None` if
              if no comment was found.
    """
    if end_index is None:
        end_index = len(text)

    comment_index = find_unescaped(text, COMMENT_SYM, start_index, end_index)
    old_comment_index = find_unescaped(text, OLD_COMMENT_SYM, start_index,
                                       end_index)
    return min_if_exist(comment_index, old_comment_index)
Beispiel #3
0
    def _apply_strategy(self, **kwargs):
        """
        `kwargs` can contain a boolean with key `inside_choice` that is
        `True` when the current word is inside a choice and `False` otherwise.
        If this boolean is not in `kwargs`, defaults to `False`.
        ´kwargs´ can also contain a boolean with key `parsing_slot_def`
        which is `True` iff the current is in a rule inside a slot definition.
        If this boolean is not in `kwargs`, defaults to `False`.
        """
        inside_choice = kwargs.get("inside_choice", False)
        parsing_slot_def = kwargs.get("parsing_slot_def", False)

        # TODO this might be better using regexes
        if self._text[self._start_index].isspace():
            self.error_msg = \
                "Invalid token. Expected a word instead of a whitespace there."
            return False

        # Find whitespace after the word
        next_word_index = self._start_index + 1  # NOTE exclusive
        while True:
            if (next_word_index == len(self._text)
                    or self._text[next_word_index].isspace()):
                break
            next_word_index += 1

        next_word_index = \
            min_if_exist(
                next_word_index,
                find_next_comment(self._text, self._start_index)
            )

        if next_word_index == self._start_index:
            self.error_msg = "Invalid token. Expected a word to start here."
            return False
        for current_char in RuleWord._should_be_escaped_chars:
            if next_word_index == self._start_index:
                break
            next_word_index = \
                min_if_exist(
                    next_word_index,
                    find_unescaped(self._text, current_char, self._start_index)
                )

        if inside_choice and next_word_index > self._start_index:
            for char_to_escape in RuleWord._should_be_escaped_in_choices_chars:
                next_word_index = \
                    min_if_exist(
                        next_word_index,
                        find_unescaped(
                            self._text, char_to_escape, self._start_index
                        )
                    )

        if parsing_slot_def and next_word_index > self._start_index:
            for char_to_escape in RuleWord._should_be_escaped_in_slot_def_chars:
                next_word_index = \
                    min_if_exist(
                        next_word_index,
                        find_unescaped(
                            self._text, char_to_escape, self._start_index
                        )
                    )

        if next_word_index == self._start_index:
            self.error_msg = "Invalid token. Expected a word to start here."
            return False

        word = self._text[self._start_index:next_word_index]
        self._next_index = next_word_index
        self._update_furthest_matched_index()
        self._tokens.append(LexicalToken(TerminalType.word, word))
        return True
Beispiel #4
0
    def _apply_strategy(self, **kwargs):
        """
        `kwargs` can contain a value with key `extracting_key`.
        `extracting_key` is a boolean that is `True` if this rule should extract 
        a key and `False` if this rule should extract a value.
        If `kwargs` doesn't contain `extracting_key`, defaults to `True`.
        """
        extracting_key = kwargs.get("extracting_key", True)
        if extracting_key:
            terminal_type = TerminalType.key
        else:
            terminal_type = TerminalType.value

        encloser = None
        for current_encloser in KEY_VAL_ENCLOSERS:
            if self._text.startswith(current_encloser, self._next_index):
                self._next_index += 1
                self._update_furthest_matched_index()
                encloser = current_encloser
                break
        
        if encloser is not None:
            # Enclosed key/value
            next_encloser_index = \
                find_unescaped(self._text, encloser, self._next_index)
            if next_encloser_index is None:
                self.error_msg = \
                    "Missing key-value encloser. Expected symbol " + encloser + \
                    " instead of end of line."
                return False

            extracted_text = self._text[self._start_index+1:next_encloser_index]
            self._next_index = next_encloser_index + 1
            self._update_furthest_matched_index()
            self._tokens.append(LexicalToken(terminal_type, extracted_text))
            return True
        else:
            # Key/value not enclosed
            end_annotation_index = \
                find_unescaped(self._text, ANNOTATION_END, self._next_index)
            if extracting_key:
                next_connector_index = \
                    find_unescaped(
                        self._text, KEY_VAL_CONNECTOR, self._next_index
                    )
                end_key_value_index = \
                    min_if_exist(next_connector_index, end_annotation_index)
            else:  # Extracting value
                next_key_val_pair_index = \
                    find_unescaped(
                        self._text, ANNOTATION_SEP, self._next_index
                    )
                end_key_value_index = \
                    min_if_exist(next_key_val_pair_index, end_annotation_index)

            if end_key_value_index is None:
                self.error_msg = \
                    "Couldn't find the end of key/value. " + \
                    "Didn't expect the end of the line there."
                return False
            
            extracted_text = \
                self._text[self._start_index:end_key_value_index].rstrip()
            self._next_index += len(extracted_text)
            self._update_furthest_matched_index()
            self._tokens.append(LexicalToken(terminal_type, extracted_text))

            return True