def _split_by(value, by, maxsplit): if isinstance(value, unicode): assert isinstance(by, unicode) if isinstance(value, str): assert isinstance(by, str) if isinstance(value, list): assert isinstance(by, str) bylen = len(by) if bylen == 0: raise ValueError("empty separator") # XXX measure if preallocating the result list to the correct # size is faster, should be start = 0 if bylen == 1: # fast path: uses str.find(character) and str.count(character) by = by[0] # annotator hack: string -> char cnt = count(value, by, 0, len(value)) if cnt == 0: if isinstance(value, str): return [value] return [value[0:len(value)]] if 0 <= maxsplit < cnt: cnt = maxsplit res = newlist_hint(cnt + 1) while cnt > 0: next = find(value, by, start, len(value)) assert next >= 0 # cannot fail due to the value.count above res.append(value[start:next]) start = next + bylen cnt -= 1 res.append(value[start:len(value)]) return res if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] while maxsplit != 0: next = find(value, by, start, len(value)) if next < 0: break assert start >= 0 res.append(value[start:next]) start = next + bylen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[start:len(value)]) return res
def makebytearraydata_w(space, w_source): # String-like argument try: buf = space.buffer_w(w_source, space.BUF_FULL_RO) except OperationError as e: if not e.match(space, space.w_TypeError): raise else: return list(buf.as_str()) # sequence of bytes w_iter = space.iter(w_source) length_hint = space.length_hint(w_source, 0) data = newlist_hint(length_hint) extended = 0 while True: try: w_item = space.next(w_iter) except OperationError as e: if not e.match(space, space.w_StopIteration): raise break value = getbytevalue(space, w_item) data.append(value) extended += 1 if extended < length_hint: resizelist_hint(data, extended) return data
def _str_join_many_items(self, space, list_w, size): value = self._val(space) prealloc_size = len(value) * (size - 1) unwrapped = newlist_hint(size) for i in range(size): w_s = list_w[i] check_item = self._join_check_item(space, w_s) if check_item == 1: raise oefmt(space.w_TypeError, "sequence item %d: expected string, %T found", i, w_s) elif check_item == 2: return self._join_autoconvert(space, list_w) # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder unwrapped.append(self._op_val(space, w_s)) prealloc_size += len(unwrapped[i]) sb = self._builder(prealloc_size) for i in range(size): if value and i != 0: sb.append(value) sb.append(unwrapped[i]) return self._new(sb.build())
def _str_join_many_items(self, space, list_w, size): value = self._val(space) prealloc_size = len(value) * (size - 1) unwrapped = newlist_hint(size) for i in range(size): w_s = list_w[i] try: next_string = self._op_val(space, w_s) except OperationError as e: if not e.match(space, space.w_TypeError): raise raise oefmt(space.w_TypeError, "sequence item %d: expected %s, %T found", i, self._generic_name(), w_s) # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder unwrapped.append(next_string) prealloc_size += len(unwrapped[i]) sb = self._builder(prealloc_size) for i in range(size): if value and i != 0: sb.append(value) sb.append(unwrapped[i]) return self._new(sb.build())
def _rsplit_by(value, by, maxsplit): if isinstance(value, unicode): assert isinstance(by, unicode) if isinstance(value, str): assert isinstance(by, str) if isinstance(value, list): assert isinstance(by, str) if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] end = len(value) bylen = len(by) if bylen == 0: raise ValueError("empty separator") while maxsplit != 0: next = rfind(value, by, 0, end) if next < 0: break res.append(value[next + bylen:end]) end = next maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[:end]) res.reverse() return res
def rsplit(value, by, maxsplit=-1): if isinstance(value, str): assert isinstance(by, str) else: assert isinstance(by, unicode) if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] end = len(value) bylen = len(by) if bylen == 0: raise ValueError("empty separator") while maxsplit != 0: next = value.rfind(by, 0, end) if next < 0: break res.append(value[next + bylen:end]) end = next maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[:end]) res.reverse() return res
def rsplit(value, by=None, maxsplit=-1): if by is None: res = [] i = len(value) - 1 while True: # starting from the end, find the end of the next word while i >= 0: if not _isspace(value[i]): break # found i -= 1 else: break # end of string, finished # find the start of the word # (more precisely, 'j' will be the space character before the word) if maxsplit == 0: j = -1 # take all the rest of the string else: j = i - 1 while j >= 0 and not _isspace(value[j]): j -= 1 maxsplit -= 1 # NB. if it's already < 0, it stays < 0 # the word is value[j+1:i+1] j1 = j + 1 assert j1 >= 0 res.append(value[j1:i+1]) # continue to look from the character before the space before the word i = j - 1 res.reverse() return res if isinstance(value, str): assert isinstance(by, str) else: assert isinstance(by, unicode) if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] end = len(value) bylen = len(by) if bylen == 0: raise ValueError("empty separator") while maxsplit != 0: next = value.rfind(by, 0, end) if next < 0: break res.append(value[next + bylen:end]) end = next maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[:end]) res.reverse() return res
def create_numbering(self): final = objectmodel.newlist_hint(len(self.current) * 3) for item in self.current: append_numbering(final, item) numb = lltype.malloc(NUMBERING, len(final)) for i, elt in enumerate(final): numb.code[i] = elt return numb
def w_keys(self, w_dict): iterator = self.iterkeys(w_dict) result = newlist_hint(self.length(w_dict)) while True: w_key = iterator.next_key() if w_key is not None: result.append(w_key) else: return self.space.newlist(result)
def values(self, w_dict): iterator = self.itervalues(w_dict) result = newlist_hint(self.length(w_dict)) while True: w_value = iterator.next_value() if w_value is not None: result.append(w_value) else: return result
def items(self, w_dict): iterator = self.iteritems(w_dict) result = newlist_hint(self.length(w_dict)) while True: w_key, w_value = iterator.next_item() if w_key is not None: result.append(self.space.newtuple([w_key, w_value])) else: return result
def method_plus(self, space, w_obj): w_other = space.convert_type(w_obj, space.w_string, "to_str") assert isinstance(w_other, W_StringObject) total_size = self.length() + w_other.length() s = space.newstr_fromchars(newlist_hint(total_size)) s.extend(space, self) s.extend(space, w_other) space.infect(s, self) space.infect(s, w_other) return s
def newstr_fromstrs(space, strs_w): total_length = 0 for w_item in strs_w: assert isinstance(w_item, W_StringObject) total_length += w_item.length() storage = newlist_hint(total_length) for w_item in strs_w: assert isinstance(w_item, W_StringObject) w_item.strategy.extend_into(w_item.str_storage, storage) return space.newstr_fromchars(storage)
def split(value, by, maxsplit=-1): if isinstance(value, str): assert isinstance(by, str) else: assert isinstance(by, unicode) bylen = len(by) if bylen == 0: raise ValueError("empty separator") start = 0 if bylen == 1: # fast path: uses str.rfind(character) and str.count(character) by = by[0] # annotator hack: string -> char count = value.count(by) if 0 <= maxsplit < count: count = maxsplit res = newlist_hint(count + 1) while count > 0: next = value.find(by, start) assert next >= 0 # cannot fail due to the value.count above res.append(value[start:next]) start = next + bylen count -= 1 res.append(value[start:len(value)]) return res if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] while maxsplit != 0: next = value.find(by, start) if next < 0: break res.append(value[start:next]) start = next + bylen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[start:len(value)]) return res
def _from_byte_sequence(space, w_source): # Split off in a separate function for the JIT's benefit # and add a jitdriver with the type of w_iter as the green key w_iter = space.iter(w_source) length_hint = space.length_hint(w_source, 0) data = newlist_hint(length_hint) # _from_byte_sequence_loop(space, w_iter, data) # extended = len(data) if extended < length_hint: resizelist_hint(data, extended) return data
def get_class_methods(interp, w_obj): '''Gets the class methods' names.''' klass = _get_class(interp, w_obj) space = interp.space if not klass: return space.w_Null contextclass = interp.get_contextclass() methods = klass.get_methods(contextclass) class_methods = newlist_hint(len(klass.methods)) for method in methods: class_methods.append(space.newstr(method)) return space.new_array_from_list(class_methods)
def read_list(stream, end): so_far = newlist_hint(8) while True: next_token = read_token(stream) if next_token is dot_token: last = read_stream(stream) close = read_token(stream) if isinstance(close, RParenToken): check_matches(end, close.str) return to_improper(so_far, last) else: raise SchemeException("read: illegal use of `.`") elif isinstance(next_token, RParenToken): check_matches(end, next_token.str) return to_improper(so_far, values.w_null) elif isinstance(next_token, LParenToken): v = read_list(stream, next_token.str) elif isinstance(next_token, SpecialToken): arg = read_stream(stream) v = next_token.finish(arg) else: assert isinstance(next_token, values.W_Object) v = next_token so_far.append(v)
def f(i): while i > 0: driver.jit_merge_point(i=i) l = newlist_hint(5) l.append(1) i -= l[0]
def f(i): l = newlist_hint(i) l[0] = 55 return len(l)
def split(value, by=None, maxsplit=-1): if by is None: length = len(value) i = 0 res = [] while True: # find the beginning of the next word while i < length: if not _isspace(value[i]): break # found i += 1 else: break # end of string, finished # find the end of the word if maxsplit == 0: j = length # take all the rest of the string else: j = i + 1 while j < length and not _isspace(value[j]): j += 1 maxsplit -= 1 # NB. if it's already < 0, it stays < 0 # the word is value[i:j] res.append(value[i:j]) # continue to look from the character following the space after the word i = j + 1 return res if isinstance(value, unicode): assert isinstance(by, unicode) if isinstance(value, str): assert isinstance(by, str) if isinstance(value, list): assert isinstance(by, str) bylen = len(by) if bylen == 0: raise ValueError("empty separator") start = 0 if bylen == 1: # fast path: uses str.rfind(character) and str.count(character) by = by[0] # annotator hack: string -> char cnt = count(value, by, 0, len(value)) if 0 <= maxsplit < cnt: cnt = maxsplit res = newlist_hint(cnt + 1) while cnt > 0: next = find(value, by, start, len(value)) assert next >= 0 # cannot fail due to the value.count above res.append(value[start:next]) start = next + bylen cnt -= 1 res.append(value[start:len(value)]) return res if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] while maxsplit != 0: next = find(value, by, start, len(value)) if next < 0: break assert start >= 0 res.append(value[start:next]) start = next + bylen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[start:len(value)]) return res
def f(z): x = newlist_hint(sizehint=z) return len(x)
def __init__(self, size): self.handles = newlist_hint(size)
def makebytearraydata_w(space, w_source): # String-like argument try: string = space.bufferstr_new_w(w_source) except OperationError, e: if not e.match(space, space.w_TypeError): raise else: return [c for c in string] # sequence of bytes w_iter = space.iter(w_source) length_hint = space.length_hint(w_source, 0) data = newlist_hint(length_hint) extended = 0 while True: try: w_item = space.next(w_iter) except OperationError, e: if not e.match(space, space.w_StopIteration): raise break value = getbytevalue(space, w_item) data.append(value) extended += 1 if extended < length_hint: resizelist_hint(data, extended) return data
def rsplit(value, by=None, maxsplit=-1): if by is None: res = [] i = len(value) - 1 while True: # starting from the end, find the end of the next word while i >= 0: if not _isspace(value[i]): break # found i -= 1 else: break # end of string, finished # find the start of the word # (more precisely, 'j' will be the space character before the word) if maxsplit == 0: j = -1 # take all the rest of the string else: j = i - 1 while j >= 0 and not _isspace(value[j]): j -= 1 maxsplit -= 1 # NB. if it's already < 0, it stays < 0 # the word is value[j+1:i+1] j1 = j + 1 assert j1 >= 0 res.append(value[j1:i + 1]) # continue to look from the character before the space before the word i = j - 1 res.reverse() return res if isinstance(value, unicode): assert isinstance(by, unicode) if isinstance(value, str): assert isinstance(by, str) if isinstance(value, list): assert isinstance(by, str) if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] end = len(value) bylen = len(by) if bylen == 0: raise ValueError("empty separator") while maxsplit != 0: next = rfind(value, by, 0, end) if next < 0: break res.append(value[next + bylen:end]) end = next maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(value[:end]) res.reverse() return res
def __init__(self, init_size=INIT_SIZE): self.l = newlist_hint(init_size)
def f(i): z = strings[i] x = newlist_hint(sizehint=13) x += z return ''.join(x)
def f(z): x = newlist_hint(sizehint=38) if z < 0: x.append(1) return len(x)
def __init__(self, size=0): self.current = objectmodel.newlist_hint(size)
def __init__(self, init_size=INIT_SIZE): assert init_size >= 0 self.l = newlist_hint(init_size)
def next_w(self): space = self.space dialect = self.dialect self.fields_w = objectmodel.newlist_hint(self.sizehint) self.numeric_field = False field_builder = None # valid iff state not in [START_RECORD, EAT_CRNL] state = START_RECORD # while True: try: w_line = space.next(self.w_iter) except OperationError as e: if e.match(space, space.w_StopIteration): if (field_builder is not None and state != START_RECORD and state != EAT_CRNL and (len(field_builder.build()) > 0 or state == IN_QUOTED_FIELD)): if dialect.strict: raise self.error(u"unexpected end of data") else: self.save_field(field_builder) break raise self.line_num += 1 if space.isinstance_w(w_line, space.w_bytes): raise self.error(u"iterator should return strings, not bytes " u"(did you open the file in text mode?") line = space.utf8_w(w_line) for c in Utf8StringIterator(line): if c == 0: raise self.error(u"line contains NULL byte") if state == START_RECORD: if c == ord(u'\n') or c == ord(u'\r'): state = EAT_CRNL continue # normal character - handle as START_FIELD state = START_FIELD # fall-through to the next case if state == START_FIELD: field_builder = Utf8StringBuilder(64) # expecting field if c == ord(u'\n') or c == ord(u'\r'): # save empty field self.save_field(field_builder) state = EAT_CRNL elif (c == dialect.quotechar and dialect.quoting != QUOTE_NONE): # start quoted field state = IN_QUOTED_FIELD elif c == dialect.escapechar: # possible escaped character state = ESCAPED_CHAR elif c == ord(u' ') and dialect.skipinitialspace: # ignore space at start of field pass elif c == dialect.delimiter: # save empty field self.save_field(field_builder) else: # begin new unquoted field if dialect.quoting == QUOTE_NONNUMERIC: self.numeric_field = True self.add_char(field_builder, c) state = IN_FIELD elif state == ESCAPED_CHAR: if c == ord(u'\n') or c == ord(u'\r'): self.add_char(field_builder, c) state = AFTER_ESCAPED_CRNL else: self.add_char(field_builder, c) state = IN_FIELD elif state == IN_FIELD or state == AFTER_ESCAPED_CRNL: # in unquoted field if c == ord(u'\n') or c == ord(u'\r'): # end of line self.save_field(field_builder) state = EAT_CRNL elif c == dialect.escapechar: # possible escaped character state = ESCAPED_CHAR elif c == dialect.delimiter: # save field - wait for new field self.save_field(field_builder) state = START_FIELD else: # normal character - save in field self.add_char(field_builder, c) elif state == IN_QUOTED_FIELD: # in quoted field if c == dialect.escapechar: # Possible escape character state = ESCAPE_IN_QUOTED_FIELD elif (c == dialect.quotechar and dialect.quoting != QUOTE_NONE): if dialect.doublequote: # doublequote; " represented by "" state = QUOTE_IN_QUOTED_FIELD else: # end of quote part of field state = IN_FIELD else: # normal character - save in field self.add_char(field_builder, c) elif state == ESCAPE_IN_QUOTED_FIELD: self.add_char(field_builder, c) state = IN_QUOTED_FIELD elif state == QUOTE_IN_QUOTED_FIELD: # doublequote - seen a quote in an quoted field if (dialect.quoting != QUOTE_NONE and c == dialect.quotechar): # save "" as " self.add_char(field_builder, c) state = IN_QUOTED_FIELD elif c == dialect.delimiter: # save field - wait for new field self.save_field(field_builder) state = START_FIELD elif c == ord(u'\n') or c == ord(u'\r'): # end of line self.save_field(field_builder) state = EAT_CRNL elif not dialect.strict: self.add_char(field_builder, c) state = IN_FIELD else: # illegal raise self.error(u"'%s' expected after '%s'" % (unichr( dialect.delimiter), unichr(dialect.quotechar))) elif state == EAT_CRNL: if not (c == ord(u'\n') or c == ord(u'\r')): raise self.error( u"new-line character seen in unquoted " u"field - do you need to open the file " u"in universal-newline mode?") if state == IN_FIELD or state == QUOTE_IN_QUOTED_FIELD: self.save_field(field_builder) break elif state == ESCAPED_CHAR: self.add_char(field_builder, ord(u'\n')) state = IN_FIELD elif state == IN_QUOTED_FIELD: pass elif state == ESCAPE_IN_QUOTED_FIELD: self.add_char(field_builder, ord(u'\n')) state = IN_QUOTED_FIELD elif state == START_FIELD: # save empty field field_builder = Utf8StringBuilder() self.save_field(field_builder) break elif state == AFTER_ESCAPED_CRNL: continue else: break # w_result = space.newlist(self.fields_w) # assume all lines have the same number of fields self.sizehint = len(self.fields_w) self.fields_w = None return w_result
buf = space.buffer_w(w_source, space.BUF_FULL_RO) except OperationError, e: if not e.match(space, space.w_TypeError): raise else: return [c for c in buf.as_str()] if space.isinstance_w(w_source, space.w_unicode): raise OperationError( space.w_TypeError, space.wrap("cannot convert unicode object to bytes")) # sequence of bytes w_iter = space.iter(w_source) length_hint = space.length_hint(w_source, 0) data = newlist_hint(length_hint) extended = 0 while True: try: w_item = space.next(w_iter) except OperationError, e: if not e.match(space, space.w_StopIteration): raise break value = getbytevalue(space, w_item) data.append(value) extended += 1 if extended < length_hint: resizelist_hint(data, extended) return data