Example #1
0
    def test_split(self):
        text = 'This is a test'
        tokenz = self._tokz.tokenize(text)

        self.assertEqual(tokenz[0], Token(text, Span(0, 4)))
        self.assertEqual(tokenz[1], Token(text, Span(5, 7)))
        self.assertEqual(tokenz[2], Token(text, Span(8, 9)))
        self.assertEqual(tokenz[3], Token(text, Span(10, 14)))
Example #2
0
    def test_specials(self):
        text = 'This Dr. is a test!'
        #       0123456789012345678
        tokenz = self._tokz.tokenize(text)

        self.assertEqual(tokenz, [Token(text, Span(0, 4)),
                                  Token(text, Span(5, 8)),
                                  Token(text, Span(9, 11)),
                                  Token(text, Span(12, 13)),
                                  Token(text, Span(14, 19))])
 def tokenize(self, text):
     """
     Tokenize the string using white space.
     :param text: The string of text to tokenize
     :return: The list of tokens separated by white space.
     """
     return [Token(text, Span(s, e)) for s, e in _white_space_spans(text)]
Example #4
0
 def _create_record_node(self):
     """Creates a record node."""
     node = MappingDataNode()
     start_mark = self._event.start_mark
     end_mark = self._event.end_mark
     self._next_parse_event()
     # create children
     while (self._event is not None and
            not isinstance(self._event, pyyaml.MappingEndEvent)):
         key = self._create_record_key()
         self._next_parse_event()  # value event
         if not key:  # if key is invalid
             continue
         if self._event is None:
             break  # something went wrong, abandon ship!
         if key.value == '<<':  # handle merge
             self._perform_merge(key, node)
             self._next_parse_event()
             continue
         child_node = self._create_node(node)
         self._next_parse_event()
         if child_node is None:  # i.e. unresolved alias
             continue
         child_node.key = key
         node.set_child(child_node)
     if self._event is not None:  # update end_mark when map ends correctly
         end_mark = self._event.end_mark
     elif node.children:
         end_mark = node.children[-1].span.end
         end_mark.line -= 1
         end_mark.column -= 1
     node.span = Span.from_marks(start_mark, end_mark)
     return node
Example #5
0
    def test_postfix(self):
        text = 'Hello) my 555! dearest world?'
        #       01234567890123456789012345678
        res = self._tokz.tokenize(text)

        self.assertEqual(res, [Token(text, Span(0, 5)),
                               Token(text, Span(5, 6)),
                               Token(text, Span(7, 9)),
                               Token(text, Span(10, 13)),
                               Token(text, Span(13, 14)),
                               Token(text, Span(15, 22)),
                               Token(text, Span(23, 28)),
                               Token(text, Span(28, 29))])
Example #6
0
 def test_prefix(self):
     text = '(Hello my $555 dearest world'
     #       0123456789012345678901234567
     res =  self._tokz.tokenize(text)
     self.assertEqual(res, [Token(text, Span(0, 1)),
                            Token(text, Span(1, 6)),
                            Token(text, Span(7, 9)),
                            Token(text, Span(10, 11)),
                            Token(text, Span(11, 14)),
                            Token(text, Span(15, 22)),
                            Token(text, Span(23, 28))])
Example #7
0
 def _create_record_key(self):
     """Creates `TextValue` of record key."""
     # check if key is scalar
     if not isinstance(self._event, pyyaml.ScalarEvent):
         start_pos = Position.from_mark(self._event.start_mark)
         self._create_fatal_error_node(start_pos)
         notification = Notification.from_name('ComplexRecordKey')
         notification.span = self._fatal_error_node.span
         self.notification_handler.report(notification)
         self._event = None
         self._iterate_events = False
         return None
     key = TextValue()
     key.value = self._event.value
     key.span = Span.from_event(self._event)
     return key
Example #8
0
    def test_whitspace(self):
        text = 'Hello there 5555 my dearest world'
        #       012345678901234567890123456789012
        res = self._tokz.tokenize(text)

        self.assertEqual(res, [Token(text, Span(0, 5)),
                               Token(text, Span(6, 11)),
                               Token(text, Span(12, 16)),
                               Token(text, Span(17, 19)),
                               Token(text, Span(20, 27)),
                               Token(text, Span(28, 33))])
Example #9
0
 def _create_scalar_node(self):
     """Creates a ScalarDataNode."""
     node = ScalarDataNode()
     tag = self._event.tag
     if tag is None or not tag.startswith('tag:yaml.org,2002:'):
         tag = resolve_scalar_tag(self._event.value)
     node.span = Span.from_event(self._event)
     try:
         node.value = construct_scalar(self._event.value, tag)
     except Exception as error:
         notification = Notification.from_name('ConstructScalarError', error.args[0])
         notification.span = node.span
         self.notification_handler.report(notification)
         return node
     if node.value is None:
         # alter position of empty node (so it can be selected)
         node.span.end.column += 1
     return node
Example #10
0
 def _create_array_node(self):
     """Creates an array node."""
     node = SequenceDataNode()
     start_mark = self._event.start_mark
     end_mark = self._event.end_mark
     self._next_parse_event()
     while (self._event is not None and
            not isinstance(self._event, pyyaml.SequenceEndEvent)):
         key = TextValue(str(len(node.children)))
         child_node = self._create_node(node)
         self._next_parse_event()
         if child_node is None:  # i.e. unresolved alias
             continue
         child_node.key = key
         node.children.append(child_node)
     if self._event is not None:  # update end_mark when array ends correctly
         end_mark = self._event.end_mark
     elif node.children:
         end_mark = node.children[-1].span.end
         end_mark.line -= 1
         end_mark.column -= 1
     node.span = Span.from_marks(start_mark, end_mark)
     return node
Example #11
0
 def tokenize(self, text):
     return [Token(text, Span(s, e)) for s, e in _white_space_spans(text)]
Example #12
0
 def test_infix(self):
     text = 'Hel?lo my 55!5! dear,est (world?'
     #       01234567890123456789012345678901
     res = self._tokz.tokenize(text)
     self.assertEqual(res, [Token(text, Span(0, 3)),
                            Token(text, Span(3, 4)),
                            Token(text, Span(4, 6)),
                            Token(text, Span(7, 9)),
                            Token(text, Span(10, 12)),
                            Token(text, Span(12, 13)),
                            Token(text, Span(13, 14)),
                            Token(text, Span(14, 15)),
                            Token(text, Span(16, 20)),
                            Token(text, Span(20, 21)),
                            Token(text, Span(21, 24)),
                            Token(text, Span(25, 26)),
                            Token(text, Span(26, 31)),
                            Token(text, Span(31, 32))
                            ])
Example #13
0
 def test_empty(self):
     text = ''
     tokenz = self._tokz.tokenize(text)
     self.assertEqual(tokenz[0], Token(text, Span(0, 1)))
Example #14
0
 def test_singleword(self):
     text = 'This'
     tokenz = self._tokz.tokenize(text)
     self.assertEqual(tokenz[0], Token(text, Span(0, 4)))
Example #15
0
    def tokenize(self, text):

        return [Token(text, Span(s, e)) for s, e in _white_space_spans(text)]

        raise NotImplementedError('Default tokenizer method not implemented')