def test_deserializing_java_output_2(self): try: byte_stream = _get_java_output_stream(self.wd) # read integers self.assertEqual(42, srl.deserialize_vint(byte_stream)) self.assertEqual(4242, srl.deserialize_vint(byte_stream)) self.assertEqual(424242, srl.deserialize_vint(byte_stream)) self.assertEqual(42424242, srl.deserialize_vint(byte_stream)) self.assertEqual(-42, srl.deserialize_vint(byte_stream)) # longs self.assertEqual(42, srl.deserialize_vint(byte_stream)) self.assertEqual(424242, srl.deserialize_vint(byte_stream)) self.assertEqual(4242424242, srl.deserialize_vint(byte_stream)) # strings # first one is plain ASCII self.assertEqual(u"hello world", wu.readString(byte_stream)) # second has accented characters self.assertEqual(u"oggi è giovedì", wu.readString(byte_stream)) # final piece is an encoded Text object self.assertEqual( u"à Text object", srl.deserialize_text(byte_stream) ) finally: pass
def test_deserializing_java_output(self): wd = tempfile.mkdtemp(prefix="pydoop_") try: byte_stream = _get_java_output_stream(wd) # read integers self.assertEqual(42, wu.readVInt(byte_stream)) self.assertEqual(4242, wu.readVInt(byte_stream)) self.assertEqual(424242, wu.readVInt(byte_stream)) self.assertEqual(42424242, wu.readVInt(byte_stream)) self.assertEqual(-42, wu.readVInt(byte_stream)) # longs self.assertEqual(42, wu.readVLong(byte_stream)) self.assertEqual(424242, wu.readVLong(byte_stream)) self.assertEqual(4242424242, wu.readVLong(byte_stream)) # strings # first one is plain ASCII self.assertEqual(u"hello world", wu.readString(byte_stream)) # second has accented characters self.assertEqual(u"oggi è giovedì", wu.readString(byte_stream)) # final piece is an encoded Text object self.assertEqual(u"à Text object", srl.deserialize_text(byte_stream)) finally: shutil.rmtree(wd)
def __init__(self, data): stream = StringIO(data) if hadoop_version_info().has_variable_isplit_encoding(): self.filename = deserialize_text(stream) else: self.filename = deserialize_old_style_filename(stream) self.offset = deserialize_long(stream) self.length = deserialize_long(stream)
def test_string(self): N = 10 stream = self.stream test_file = __file__.replace("pyc", "py") with open(test_file) as f: s = unicode(f.read(), 'utf-8') t = s for _ in range(N): srl.serialize_text(t, stream) stream.seek(0) t = s for _ in range(N): s1 = srl.deserialize_text(stream) self.assertEqual(t, s1)
def test_string(self): N = 10 stream = self.stream test_file = __file__.replace("pyc", "py") with open(test_file) as f: s = unicode(f.read(), "utf-8") t = s for _ in range(N): srl.serialize_text(t, stream) stream.seek(0) t = s for _ in range(N): s1 = srl.deserialize_text(stream) self.assertEqual(t, s1)
def test_mixture(self): stream = self.stream vals = [1, 0.33, 0.3290, 1902, 'sshjdhsj', 0.3, -33, 'ueiwriuqrei'] for v in vals: if isinstance(v, int): srl.serialize_int(v, stream) elif isinstance(v, float): srl.serialize_float(v, stream) elif isinstance(v, str): srl.serialize_text(v, stream) stream.seek(0) for v in vals: if isinstance(v, int): x = srl.deserialize_int(stream) self.assertEqual(v, x) elif isinstance(v, float): x = srl.deserialize_float(stream) self.assertTrue(abs(v - x) / abs(v + x) < 1e-6) elif isinstance(v, str): x = srl.deserialize_text(stream) self.assertEqual(v, x)
def test_mixture(self): stream = self.stream vals = [1, 0.33, 0.3290, 1902, "sshjdhsj", 0.3, -33, "ueiwriuqrei"] for v in vals: if isinstance(v, int): srl.serialize_int(v, stream) elif isinstance(v, float): srl.serialize_float(v, stream) elif isinstance(v, str): srl.serialize_text(v, stream) stream.seek(0) for v in vals: if isinstance(v, int): x = srl.deserialize_int(stream) self.assertEqual(v, x) elif isinstance(v, float): x = srl.deserialize_float(stream) self.assertTrue(abs(v - x) / abs(v + x) < 1e-6) elif isinstance(v, str): x = srl.deserialize_text(stream) self.assertEqual(v, x)