def test_unicode(self): space = self.space s = u'hello world' w_ret = parsestring.parsestr(space, None, repr(s)) ret = space.unwrap(w_ret) assert isinstance(ret, unicode) assert ret == s s = u'hello\n world' w_ret = parsestring.parsestr(self.space, None, repr(s)) ret = space.unwrap(w_ret) assert isinstance(ret, unicode) assert ret == s s = "u'''hello\\x42 world'''" w_ret = parsestring.parsestr(self.space, None, s) ret = space.unwrap(w_ret) assert isinstance(ret, unicode) assert ret == u'hello\x42 world' s = "u'''hello\\u0842 world'''" w_ret = parsestring.parsestr(self.space, None, s) ret = space.unwrap(w_ret) assert isinstance(ret, unicode) assert ret == u'hello\u0842 world' s = "u'\x81'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, 'koi8-u', s) ret = space.unwrap(w_ret) assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
def test_unicode_literals(self): space = self.space w_ret = parsestring.parsestr(space, None, repr("hello"), True) assert space.isinstance_w(w_ret, space.w_unicode) w_ret = parsestring.parsestr(space, None, "b'hi'", True) assert space.isinstance_w(w_ret, space.w_bytes) w_ret = parsestring.parsestr(space, None, "r'hi'", True) assert space.isinstance_w(w_ret, space.w_unicode)
def test_bytes(self): space = self.space b = "b'hello'" w_ret = parsestring.parsestr(space, None, b) assert space.unwrap(w_ret) == "hello" b = "b'''hello'''" w_ret = parsestring.parsestr(space, None, b) assert space.unwrap(w_ret) == "hello"
def test_unicode_literals(self): space = self.space w_ret = parsestring.parsestr(space, None, repr("hello")) assert space.isinstance_w(w_ret, space.w_unicode) w_ret = parsestring.parsestr(space, None, "b'hi'") assert space.isinstance_w(w_ret, space.w_str) w_ret = parsestring.parsestr(space, None, "r'hi'") assert space.isinstance_w(w_ret, space.w_unicode)
def build_atom(builder, nb): atoms = get_atoms(builder, nb) top = atoms[0] if isinstance(top, TokenObject): # assert isinstance(top, TokenObject) # rtyper if top.name == builder.parser.tokens['LPAR']: if len(atoms) == 2: builder.push(ast.Tuple([], top.lineno)) else: builder.push(atoms[1]) elif top.name == builder.parser.tokens['LSQB']: if len(atoms) == 2: builder.push(ast.List([], top.lineno)) else: list_node = atoms[1] list_node.lineno = top.lineno builder.push(list_node) elif top.name == builder.parser.tokens['LBRACE']: items = [] for index in range(1, len(atoms) - 1, 4): # a : b , c : d # ^ +1 +2 +3 +4 items.append((atoms[index], atoms[index + 2])) builder.push(ast.Dict(items, top.lineno)) elif top.name == builder.parser.tokens['NAME']: val = top.get_value() builder.push(ast.Name(val, top.lineno)) elif top.name == builder.parser.tokens['NUMBER']: builder.push( ast.Const(builder.eval_number(top.get_value()), top.lineno)) elif top.name == builder.parser.tokens['STRING']: # need to concatenate strings in atoms s = '' if len(atoms) == 1: token = atoms[0] assert isinstance(token, TokenObject) builder.push( ast.Const( parsestr(builder.space, builder.source_encoding, token.get_value()), top.lineno)) else: space = builder.space empty = space.wrap('') accum = [] for token in atoms: assert isinstance(token, TokenObject) accum.append( parsestr(builder.space, builder.source_encoding, token.get_value())) w_s = space.call_method(empty, 'join', space.newlist(accum)) builder.push(ast.Const(w_s, top.lineno)) elif top.name == builder.parser.tokens['BACKQUOTE']: builder.push(ast.Backquote(atoms[1], atoms[1].lineno)) else: raise SyntaxError("unexpected tokens", top.lineno, top.col)
def build_atom(builder, nb): atoms = get_atoms(builder, nb) top = atoms[0] if isinstance(top, TokenObject): # assert isinstance(top, TokenObject) # rtyper if top.name == builder.parser.tokens["LPAR"]: if len(atoms) == 2: builder.push(ast.Tuple([], top.lineno)) else: builder.push(atoms[1]) elif top.name == builder.parser.tokens["LSQB"]: if len(atoms) == 2: builder.push(ast.List([], top.lineno)) else: list_node = atoms[1] list_node.lineno = top.lineno builder.push(list_node) elif top.name == builder.parser.tokens["LBRACE"]: items = [] for index in range(1, len(atoms) - 1, 4): # a : b , c : d # ^ +1 +2 +3 +4 items.append((atoms[index], atoms[index + 2])) builder.push(ast.Dict(items, top.lineno)) elif top.name == builder.parser.tokens["NAME"]: val = top.get_value() builder.push(ast.Name(val, top.lineno)) elif top.name == builder.parser.tokens["NUMBER"]: builder.push(ast.Const(builder.eval_number(top.get_value()), top.lineno)) elif top.name == builder.parser.tokens["STRING"]: # need to concatenate strings in atoms s = "" if len(atoms) == 1: token = atoms[0] assert isinstance(token, TokenObject) builder.push(ast.Const(parsestr(builder.space, builder.source_encoding, token.get_value()), top.lineno)) else: space = builder.space empty = space.wrap("") accum = [] for token in atoms: assert isinstance(token, TokenObject) accum.append(parsestr(builder.space, builder.source_encoding, token.get_value())) w_s = space.call_method(empty, "join", space.newlist(accum)) builder.push(ast.Const(w_s, top.lineno)) elif top.name == builder.parser.tokens["BACKQUOTE"]: builder.push(ast.Backquote(atoms[1], atoms[1].lineno)) else: raise SyntaxError("unexpected tokens", top.lineno, top.col)
def decode_string_literal(space, s, w_encoding=None): from pypy.interpreter.pyparser.parsestring import parsestr if space.is_true(w_encoding): encoding = space.str_w(w_encoding) else: encoding = None return parsestr(space, encoding, s)
def test_simple_enc_roundtrip(self): space = self.space s = "'\x81\\t'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, 'koi8-u', s) ret = space.unwrap(w_ret) assert ret == eval("# -*- coding: koi8-u -*-\n'\x81\\t'")
def test_simple_enc_roundtrip(self): space = self.space s = "'\x81\\t'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, 'koi8-u', s) ret = space.unwrap(w_ret) assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81\\t'")
def test_simple_enc_roundtrip(self): #py.test.skip("crashes in app_codecs, but when cheating using .encode at interp-level passes?!") space = self.space s = "'\x81'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, 'koi8-u', s) ret = space.unwrap(w_ret) assert ret == eval("# -*- coding: koi8-u -*-\n'\x81'")
def parse_and_compare(self, literal, value): space = self.space w_ret = parsestring.parsestr(space, None, literal) if isinstance(value, str): assert space.type(w_ret) == space.w_str assert space.str_w(w_ret) == value elif isinstance(value, unicode): assert space.type(w_ret) == space.w_unicode assert space.unicode_w(w_ret) == value else: assert False
def parse_and_compare(self, literal, value, encoding=None): space = self.space w_ret = parsestring.parsestr(space, encoding, literal) if isinstance(value, str): assert space.type(w_ret) == space.w_bytes assert space.bytes_w(w_ret) == value elif isinstance(value, unicode): assert space.type(w_ret) == space.w_unicode assert space.unicode_w(w_ret) == value else: assert False
def parse_and_compare(self, literal, value, encoding=None): space = self.space w_ret = parsestring.parsestr(space, encoding, literal) if isinstance(value, str): assert space.type(w_ret) == space.w_bytes assert space.str_w(w_ret) == value elif isinstance(value, unicode): assert space.type(w_ret) == space.w_unicode assert space.utf8_w(w_ret).decode('utf8') == value else: assert False
def test_unicode(self): for s in ['hello world', 'hello\n world']: self.parse_and_compare(repr(s), unicode(s)) self.parse_and_compare("'''hello\\x42 world'''", u'hello\x42 world') self.parse_and_compare("'''hello\\u0842 world'''", u'hello\u0842 world') s = "u'\x81'" s = s.decode("koi8-u").encode("utf8")[1:] w_ret = parsestring.parsestr(self.space, 'koi8-u', s) ret = w_ret._utf8.decode('utf8') assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
def test_unicode(self): space = self.space for s in [u"hello world", u"hello\n world"]: self.parse_and_compare(repr(s), s) self.parse_and_compare("u'''hello\\x42 world'''", u"hello\x42 world") self.parse_and_compare("u'''hello\\u0842 world'''", u"hello\u0842 world") s = "u'\x81'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, "koi8-u", s) ret = space.unwrap(w_ret) assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
def test_unicode(self): space = self.space for s in [u'hello world', u'hello\n world']: self.parse_and_compare(repr(s), s) self.parse_and_compare("u'''hello\\x42 world'''", u'hello\x42 world') self.parse_and_compare("u'''hello\\u0842 world'''", u'hello\u0842 world') s = "u'\x81'" s = s.decode("koi8-u").encode("utf8") w_ret = parsestring.parsestr(self.space, 'koi8-u', s) ret = space.unwrap(w_ret) assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
def test_simple(self): space = self.space s = 'hello world' w_ret = parsestring.parsestr(space, None, repr(s)) assert space.str_w(w_ret) == s s = 'hello\n world' w_ret = parsestring.parsestr(space, None, repr(s)) assert space.str_w(w_ret) == s s = "'''hello\\x42 world'''" w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == 'hello\x42 world' s = r'"\0"' w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == chr(0) s = r'"\07"' w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == chr(7) s = r'"\123"' w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == chr(0123) s = r'"\x"' space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s) s = r'"\x7"' space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s) s = r'"\x7g"' space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s) s = r'"\xfF"' w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == chr(0xFF) s = r'"\""' w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == '"' s = r"'\''" w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == "'"
def string_parse_literal(astbuilder, atom_node): space = astbuilder.space encoding = astbuilder.compile_info.encoding joined_pieces = [] fmode = False for i in range(atom_node.num_children()): child = atom_node.get_child(i) try: w_next = parsestring.parsestr(space, encoding, child.get_value(), child) if not isinstance(w_next, parsestring.W_FString): add_constant_string(astbuilder, joined_pieces, w_next, atom_node) else: parse_f_string(astbuilder, joined_pieces, w_next, atom_node) fmode = True except error.OperationError as e: if e.match(space, space.w_UnicodeError): kind = '(unicode error) ' elif e.match(space, space.w_ValueError): kind = '(value error) ' elif e.match(space, space.w_SyntaxError): kind = '' else: raise # Unicode/ValueError/SyntaxError (without position information) in # literal: turn into SyntaxError with position information e.normalize_exception(space) errmsg = space.text_w(space.str(e.get_w_value(space))) raise astbuilder.error('%s%s' % (kind, errmsg), child) if not fmode and len(joined_pieces) == 1: # <= the common path return joined_pieces[0] # ast.Str, Bytes or FormattedValue # with more than one piece, it is a combination of Str and # FormattedValue pieces---if there is a Bytes, then we got # an invalid mixture of bytes and unicode literals for node in joined_pieces: if isinstance(node, ast.Bytes): astbuilder.error("cannot mix bytes and nonbytes literals", atom_node) assert fmode return f_string_to_ast_node(astbuilder, joined_pieces, atom_node)
def test_raw_unicode_literals(self): space = self.space w_ret = parsestring.parsestr(space, None, "r'\u'") assert space.int_w(space.len(w_ret)) == 2
def test_multiline_unicode_strings_with_backslash(self): space = self.space s = '"""' + '\\' + '\n"""' w_ret = parsestring.parsestr(space, None, s) assert space.str_w(w_ret) == ''
def test_bug1(self): space = self.space expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n'] input = ["'", 'x', ' ', chr(0xc3), chr(0xa9), ' ', chr(92), 'n', "'"] w_ret = parsestring.parsestr(space, 'utf8', ''.join(input)) assert space.str_w(w_ret) == ''.join(expected)
def test_bug1(self): space = self.space expected = ["x", " ", chr(0xC3), chr(0xA9), " ", "\n"] input = ["'", "x", " ", chr(0xC3), chr(0xA9), " ", chr(92), "n", "'"] w_ret = parsestring.parsestr(space, "utf8", "".join(input)) assert space.str_w(w_ret) == "".join(expected)