def test_split(self): text = 'This is a test' tokenz = self._tokz.tokenize(text) self.assertEqual(tokenz[0], Token(text, Span(0, 4))) self.assertEqual(tokenz[1], Token(text, Span(5, 7))) self.assertEqual(tokenz[2], Token(text, Span(8, 9))) self.assertEqual(tokenz[3], Token(text, Span(10, 14)))
def test_specials(self): text = 'This Dr. is a test!' # 0123456789012345678 tokenz = self._tokz.tokenize(text) self.assertEqual(tokenz, [Token(text, Span(0, 4)), Token(text, Span(5, 8)), Token(text, Span(9, 11)), Token(text, Span(12, 13)), Token(text, Span(14, 19))])
def tokenize(self, text): """ Tokenize the string using white space. :param text: The string of text to tokenize :return: The list of tokens separated by white space. """ return [Token(text, Span(s, e)) for s, e in _white_space_spans(text)]
def _create_record_node(self): """Creates a record node.""" node = MappingDataNode() start_mark = self._event.start_mark end_mark = self._event.end_mark self._next_parse_event() # create children while (self._event is not None and not isinstance(self._event, pyyaml.MappingEndEvent)): key = self._create_record_key() self._next_parse_event() # value event if not key: # if key is invalid continue if self._event is None: break # something went wrong, abandon ship! if key.value == '<<': # handle merge self._perform_merge(key, node) self._next_parse_event() continue child_node = self._create_node(node) self._next_parse_event() if child_node is None: # i.e. unresolved alias continue child_node.key = key node.set_child(child_node) if self._event is not None: # update end_mark when map ends correctly end_mark = self._event.end_mark elif node.children: end_mark = node.children[-1].span.end end_mark.line -= 1 end_mark.column -= 1 node.span = Span.from_marks(start_mark, end_mark) return node
def test_postfix(self): text = 'Hello) my 555! dearest world?' # 01234567890123456789012345678 res = self._tokz.tokenize(text) self.assertEqual(res, [Token(text, Span(0, 5)), Token(text, Span(5, 6)), Token(text, Span(7, 9)), Token(text, Span(10, 13)), Token(text, Span(13, 14)), Token(text, Span(15, 22)), Token(text, Span(23, 28)), Token(text, Span(28, 29))])
def test_prefix(self): text = '(Hello my $555 dearest world' # 0123456789012345678901234567 res = self._tokz.tokenize(text) self.assertEqual(res, [Token(text, Span(0, 1)), Token(text, Span(1, 6)), Token(text, Span(7, 9)), Token(text, Span(10, 11)), Token(text, Span(11, 14)), Token(text, Span(15, 22)), Token(text, Span(23, 28))])
def _create_record_key(self): """Creates `TextValue` of record key.""" # check if key is scalar if not isinstance(self._event, pyyaml.ScalarEvent): start_pos = Position.from_mark(self._event.start_mark) self._create_fatal_error_node(start_pos) notification = Notification.from_name('ComplexRecordKey') notification.span = self._fatal_error_node.span self.notification_handler.report(notification) self._event = None self._iterate_events = False return None key = TextValue() key.value = self._event.value key.span = Span.from_event(self._event) return key
def test_whitspace(self): text = 'Hello there 5555 my dearest world' # 012345678901234567890123456789012 res = self._tokz.tokenize(text) self.assertEqual(res, [Token(text, Span(0, 5)), Token(text, Span(6, 11)), Token(text, Span(12, 16)), Token(text, Span(17, 19)), Token(text, Span(20, 27)), Token(text, Span(28, 33))])
def _create_scalar_node(self): """Creates a ScalarDataNode.""" node = ScalarDataNode() tag = self._event.tag if tag is None or not tag.startswith('tag:yaml.org,2002:'): tag = resolve_scalar_tag(self._event.value) node.span = Span.from_event(self._event) try: node.value = construct_scalar(self._event.value, tag) except Exception as error: notification = Notification.from_name('ConstructScalarError', error.args[0]) notification.span = node.span self.notification_handler.report(notification) return node if node.value is None: # alter position of empty node (so it can be selected) node.span.end.column += 1 return node
def _create_array_node(self): """Creates an array node.""" node = SequenceDataNode() start_mark = self._event.start_mark end_mark = self._event.end_mark self._next_parse_event() while (self._event is not None and not isinstance(self._event, pyyaml.SequenceEndEvent)): key = TextValue(str(len(node.children))) child_node = self._create_node(node) self._next_parse_event() if child_node is None: # i.e. unresolved alias continue child_node.key = key node.children.append(child_node) if self._event is not None: # update end_mark when array ends correctly end_mark = self._event.end_mark elif node.children: end_mark = node.children[-1].span.end end_mark.line -= 1 end_mark.column -= 1 node.span = Span.from_marks(start_mark, end_mark) return node
def tokenize(self, text): return [Token(text, Span(s, e)) for s, e in _white_space_spans(text)]
def test_infix(self): text = 'Hel?lo my 55!5! dear,est (world?' # 01234567890123456789012345678901 res = self._tokz.tokenize(text) self.assertEqual(res, [Token(text, Span(0, 3)), Token(text, Span(3, 4)), Token(text, Span(4, 6)), Token(text, Span(7, 9)), Token(text, Span(10, 12)), Token(text, Span(12, 13)), Token(text, Span(13, 14)), Token(text, Span(14, 15)), Token(text, Span(16, 20)), Token(text, Span(20, 21)), Token(text, Span(21, 24)), Token(text, Span(25, 26)), Token(text, Span(26, 31)), Token(text, Span(31, 32)) ])
def test_empty(self): text = '' tokenz = self._tokz.tokenize(text) self.assertEqual(tokenz[0], Token(text, Span(0, 1)))
def test_singleword(self): text = 'This' tokenz = self._tokz.tokenize(text) self.assertEqual(tokenz[0], Token(text, Span(0, 4)))
def tokenize(self, text): return [Token(text, Span(s, e)) for s, e in _white_space_spans(text)] raise NotImplementedError('Default tokenizer method not implemented')