예제 #1
0
 def wu_helper(self, data):
     out_stream = StringIO()
     for x in data:
         if isinstance(x, int):
             srl.serialize_vint(x, out_stream)
         elif isinstance(x, basestring):
             wu.writeString(out_stream, x)
     return out_stream.getvalue()
예제 #2
0
 def wu_helper(self, data):
     out_stream = StringIO()
     for x in data:
         if isinstance(x, int):
             srl.serialize_vint(x, out_stream)
         elif isinstance(x, basestring):
             wu.writeString(out_stream, x)
     return out_stream.getvalue()
예제 #3
0
 def to_string(cls, filename, offset, length):
     stream = StringIO()
     if hadoop_version_info().has_variable_isplit_encoding():
         serialize_text(filename, stream)
     else:
         serialize_old_style_filename(filename, stream)
     serialize_long(offset, stream)
     serialize_long(length, stream)
     return stream.getvalue()
예제 #4
0
 def __init__(self, data):
     stream = StringIO(data)
     if hadoop_version_info().has_variable_isplit_encoding():
         self.filename = deserialize_text(stream)
     else:
         self.filename = deserialize_old_style_filename(stream)
     self.offset = deserialize_long(stream)
     self.length = deserialize_long(stream)
예제 #5
0
def _get_java_output_stream(wd):
    this_directory = os.path.abspath(os.path.dirname(__file__))
    src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS)
    shutil.copy(src, wd)
    classpath = '.:%s:%s' % (pydoop.hadoop_classpath(), wd)
    filename_root = os.path.join(wd, _HADOOP_SERIALIZE_CLASS)
    _compile_java_part(filename_root + ".class", classpath)
    output = subprocess.check_output(
        [JAVA, '-cp', classpath, _HADOOP_SERIALIZE_CLASS],
        cwd=wd,
        stderr=open('/dev/null', 'w'))
    stream = StringIO(output)
    return stream
예제 #6
0
 def test_simulate_java_output_1(self):
     try:
         byte_stream = _get_java_output_stream(self.wd)
         out_stream = StringIO()
         # write integers
         srl.serialize_vint(42, out_stream)
         srl.serialize_vint(4242, out_stream)
         srl.serialize_vint(424242, out_stream)
         srl.serialize_vint(42424242, out_stream)
         srl.serialize_vint(-42, out_stream)
         # write longs
         srl.serialize_vint(42, out_stream)
         srl.serialize_vint(424242, out_stream)
         srl.serialize_vint(4242424242, out_stream)
         # strings
         wu.writeString(out_stream, u"hello world")
         # second has accented characters
         wu.writeString(out_stream, u"oggi è giovedì")
         #
         srl.serialize_text(u"à Text object", out_stream)
         self.assertEqual(byte_stream.getvalue(), out_stream.getvalue())
     finally:
         pass
예제 #7
0
 def test_simulate_java_output_1(self):
     try:
         byte_stream = _get_java_output_stream(self.wd)
         out_stream = StringIO()
         # write integers
         srl.serialize_vint(42, out_stream)
         srl.serialize_vint(4242, out_stream)
         srl.serialize_vint(424242, out_stream)
         srl.serialize_vint(42424242, out_stream)
         srl.serialize_vint(-42, out_stream)
         # write longs
         srl.serialize_vint(42, out_stream)
         srl.serialize_vint(424242, out_stream)
         srl.serialize_vint(4242424242, out_stream)
         # strings
         wu.writeString(out_stream, u"hello world")
         # second has accented characters
         wu.writeString(out_stream, u"oggi è giovedì")
         #
         srl.serialize_text(u"à Text object", out_stream)
         self.assertEqual(byte_stream.getvalue(), out_stream.getvalue())
     finally:
         pass
예제 #8
0
 def setUp(self):
     self.stream = StringIO()
     self.wd = tempfile.mkdtemp(prefix="pydoop_")
예제 #9
0
class TestSerialize(unittest.TestCase):

    def setUp(self):
        self.stream = StringIO()
        self.wd = tempfile.mkdtemp(prefix="pydoop_")

    def tearDown(self):
            shutil.rmtree(self.wd)

    def test_deserializing_java_output_1(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)

            # read integers
            self.assertEqual(42, wu.readVInt(byte_stream))
            self.assertEqual(4242, wu.readVInt(byte_stream))
            self.assertEqual(424242, wu.readVInt(byte_stream))
            self.assertEqual(42424242, wu.readVInt(byte_stream))
            self.assertEqual(-42, wu.readVInt(byte_stream))

            # longs
            self.assertEqual(42, wu.readVLong(byte_stream))
            self.assertEqual(424242, wu.readVLong(byte_stream))
            self.assertEqual(4242424242, wu.readVLong(byte_stream))

            # strings
            # first one is plain ASCII
            self.assertEqual(u"hello world", wu.readString(byte_stream))
            # second has accented characters
            self.assertEqual(u"oggi è giovedì", wu.readString(byte_stream))

            # final piece is an encoded Text object
            self.assertEqual(
                u"à Text object", srl.deserialize_text(byte_stream)
            )
        finally:
            pass

    def test_deserializing_java_output_2(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)

            # read integers
            self.assertEqual(42, srl.deserialize_vint(byte_stream))
            self.assertEqual(4242, srl.deserialize_vint(byte_stream))
            self.assertEqual(424242, srl.deserialize_vint(byte_stream))
            self.assertEqual(42424242, srl.deserialize_vint(byte_stream))
            self.assertEqual(-42, srl.deserialize_vint(byte_stream))

            # longs
            self.assertEqual(42, srl.deserialize_vint(byte_stream))
            self.assertEqual(424242, srl.deserialize_vint(byte_stream))
            self.assertEqual(4242424242, srl.deserialize_vint(byte_stream))

            # strings
            # first one is plain ASCII
            self.assertEqual(u"hello world", wu.readString(byte_stream))
            # second has accented characters
            self.assertEqual(u"oggi è giovedì", wu.readString(byte_stream))

            # final piece is an encoded Text object
            self.assertEqual(
                u"à Text object", srl.deserialize_text(byte_stream)
            )
        finally:
            pass

    def test_deserializing_java_output_3(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)
            fname = os.path.join(self.wd, 'foo.dat')
            with open(fname, 'wb') as f:
                f.write(byte_stream.getvalue())
            with srl.FlowReader(open(fname, 'rb')) as flow:
                # read integers
                self.assertEqual(42, flow.read("i")[0])
                self.assertEqual(4242, flow.read("i")[0])
                self.assertEqual(424242, flow.read("i")[0])
                self.assertEqual(42424242, flow.read("i")[0])
                self.assertEqual(-42, flow.read("i")[0])
                # longs
                self.assertEqual(42, flow.read("L")[0])
                self.assertEqual(424242, flow.read("L")[0])
                self.assertEqual(4242424242, flow.read("L")[0])
                # strings
                # first one is plain ASCII
                self.assertEqual(u"hello world",
                                 flow.read("S")[0].decode('UTF-8'))
                # second has accented characters
                self.assertEqual(u"oggi è giovedì",
                                 flow.read("S")[0].decode('UTF-8'))
                # final piece is an encoded Text object
                self.assertEqual(
                    u"à Text object", flow.read("s")[0].decode('UTF-8')
                )
        finally:
            pass

    def test_simulate_java_output_1(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)
            out_stream = StringIO()
            # write integers
            srl.serialize_vint(42, out_stream)
            srl.serialize_vint(4242, out_stream)
            srl.serialize_vint(424242, out_stream)
            srl.serialize_vint(42424242, out_stream)
            srl.serialize_vint(-42, out_stream)
            # write longs
            srl.serialize_vint(42, out_stream)
            srl.serialize_vint(424242, out_stream)
            srl.serialize_vint(4242424242, out_stream)
            # strings
            wu.writeString(out_stream, u"hello world")
            # second has accented characters
            wu.writeString(out_stream, u"oggi è giovedì")
            #
            srl.serialize_text(u"à Text object", out_stream)
            self.assertEqual(byte_stream.getvalue(), out_stream.getvalue())
        finally:
            pass

    def srl_helper(self, data, rule=None):
        fname = os.path.join(self.wd, 'foo.dat')
        with srl.FlowWriter(open(fname, 'wb')) as flow:
            if rule is None:
                rule = ''.join(['L' if isinstance(x, int) else 'S'
                                for x in data])
            flow.write(rule.encode('UTF-8'), data)
        with open(fname, 'rb') as f:
            return f.read()

    def wu_helper(self, data):
        out_stream = StringIO()
        for x in data:
            if isinstance(x, int):
                srl.serialize_vint(x, out_stream)
            elif isinstance(x, basestring):
                wu.writeString(out_stream, x)
        return out_stream.getvalue()

    def test_write_equiv(self):
        data = (42, 4242, 424242, 42424242, -42,
                42, 424242, 42424242424242,
                u"hello world", u"oggi è giovedì", u"à Text object")
        ser0 = self.srl_helper(data)
        ser1 = self.wu_helper(data)
        self.assertEqual(ser0, ser1)

    def test_simulate_java_output_2(self):
        data = (42, 4242, 424242, 42424242, -42,
                42, 424242, 4242424242,
                u"hello world", u"oggi è giovedì", u"à Text object")
        rule = "iiiiiLLLSSs"
        try:
            ser0 = _get_java_output_stream(self.wd).getvalue()
            ser1 = self.srl_helper(data, rule)
            self.assertEqual(ser0, ser1)
        finally:
            pass

    def test_serialize_old_style_filename(self):
        fn = 'some_filename.file'
        srl.serialize_old_style_filename(fn, self.stream)
        self.stream.seek(0)
        new_fn = srl.deserialize_old_style_filename(self.stream)
        self.assertEqual(fn, new_fn)
예제 #10
0
def get_java_output_stream(jclass, classpath, args, wd):
    output = subprocess.check_output([JAVA, '-cp', classpath, jclass] + args,
                                     cwd=wd,
                                     stderr=open('/dev/null', 'w'))
    return StringIO(output)
예제 #11
0
 def serialize(self, record):
     f = StringIO()
     encoder = BinaryEncoder(f)
     self.datum_writer.write(record, encoder)
     return f.getvalue()
예제 #12
0
 def deserialize(self, rec_bytes):
     return self.reader.read(BinaryDecoder(StringIO(rec_bytes)))
예제 #13
0
파일: simulator.py 프로젝트: crs4/pydoop
def serialize_long_to_string(v):
    f = StringIO()
    serialize_long(v, f)
    return f.getvalue()
예제 #14
0
 def setUp(self):
     self.stream = StringIO()
     self.wd = tempfile.mkdtemp(prefix="pydoop_")
예제 #15
0
class TestSerialize(unittest.TestCase):

    def setUp(self):
        self.stream = StringIO()
        self.wd = tempfile.mkdtemp(prefix="pydoop_")

    def tearDown(self):
        shutil.rmtree(self.wd)

    def test_deserializing_java_output_1(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)

            # read integers
            self.assertEqual(42, wu.readVInt(byte_stream))
            self.assertEqual(4242, wu.readVInt(byte_stream))
            self.assertEqual(424242, wu.readVInt(byte_stream))
            self.assertEqual(42424242, wu.readVInt(byte_stream))
            self.assertEqual(-42, wu.readVInt(byte_stream))

            # longs
            self.assertEqual(42, wu.readVLong(byte_stream))
            self.assertEqual(424242, wu.readVLong(byte_stream))
            self.assertEqual(4242424242, wu.readVLong(byte_stream))

            # strings
            # first one is plain ASCII
            self.assertEqual(u"hello world", wu.readString(byte_stream))
            # second has accented characters
            self.assertEqual(u"oggi è giovedì", wu.readString(byte_stream))

            # final piece is an encoded Text object
            self.assertEqual(
                u"à Text object", srl.deserialize_text(byte_stream)
            )
        finally:
            pass

    def test_deserializing_java_output_2(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)

            # read integers
            self.assertEqual(42, srl.deserialize_vint(byte_stream))
            self.assertEqual(4242, srl.deserialize_vint(byte_stream))
            self.assertEqual(424242, srl.deserialize_vint(byte_stream))
            self.assertEqual(42424242, srl.deserialize_vint(byte_stream))
            self.assertEqual(-42, srl.deserialize_vint(byte_stream))

            # longs
            self.assertEqual(42, srl.deserialize_vint(byte_stream))
            self.assertEqual(424242, srl.deserialize_vint(byte_stream))
            self.assertEqual(4242424242, srl.deserialize_vint(byte_stream))

            # strings
            # first one is plain ASCII
            self.assertEqual(u"hello world", wu.readString(byte_stream))
            # second has accented characters
            self.assertEqual(u"oggi è giovedì", wu.readString(byte_stream))

            # final piece is an encoded Text object
            self.assertEqual(
                u"à Text object", srl.deserialize_text(byte_stream)
            )
        finally:
            pass

    def test_deserializing_java_output_3(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)
            fname = os.path.join(self.wd, 'foo.dat')
            with open(fname, 'wb') as f:
                f.write(byte_stream.getvalue())
            with srl.FlowReader(open(fname, 'rb')) as flow:
                # read integers
                self.assertEqual(42, flow.read("i")[0])
                self.assertEqual(4242, flow.read("i")[0])
                self.assertEqual(424242, flow.read("i")[0])
                self.assertEqual(42424242, flow.read("i")[0])
                self.assertEqual(-42, flow.read("i")[0])
                # longs
                self.assertEqual(42, flow.read("L")[0])
                self.assertEqual(424242, flow.read("L")[0])
                self.assertEqual(4242424242, flow.read("L")[0])
                # strings
                # first one is plain ASCII
                self.assertEqual(u"hello world",
                                 flow.read("S")[0].decode('UTF-8'))
                # second has accented characters
                self.assertEqual(u"oggi è giovedì",
                                 flow.read("S")[0].decode('UTF-8'))
                # final piece is an encoded Text object
                self.assertEqual(
                    u"à Text object", flow.read("s")[0].decode('UTF-8')
                )
        finally:
            pass

    def test_simulate_java_output_1(self):
        try:
            byte_stream = _get_java_output_stream(self.wd)
            out_stream = StringIO()
            # write integers
            srl.serialize_vint(42, out_stream)
            srl.serialize_vint(4242, out_stream)
            srl.serialize_vint(424242, out_stream)
            srl.serialize_vint(42424242, out_stream)
            srl.serialize_vint(-42, out_stream)
            # write longs
            srl.serialize_vint(42, out_stream)
            srl.serialize_vint(424242, out_stream)
            srl.serialize_vint(4242424242, out_stream)
            # strings
            wu.writeString(out_stream, u"hello world")
            # second has accented characters
            wu.writeString(out_stream, u"oggi è giovedì")
            #
            srl.serialize_text(u"à Text object", out_stream)
            self.assertEqual(byte_stream.getvalue(), out_stream.getvalue())
        finally:
            pass

    def srl_helper(self, data, rule=None):
        fname = os.path.join(self.wd, 'foo.dat')
        with srl.FlowWriter(open(fname, 'wb')) as flow:
            if rule is None:
                rule = ''.join(['L' if isinstance(x, int) else 'S'
                                for x in data])
            flow.write(rule.encode('UTF-8'), data)
        with open(fname, 'rb') as f:
            return f.read()

    def wu_helper(self, data):
        out_stream = StringIO()
        for x in data:
            if isinstance(x, int):
                srl.serialize_vint(x, out_stream)
            elif isinstance(x, basestring):
                wu.writeString(out_stream, x)
        return out_stream.getvalue()

    def test_write_equiv(self):
        data = (42, 4242, 424242, 42424242, -42,
                42, 424242, 42424242424242,
                u"hello world", u"oggi è giovedì", u"à Text object")
        ser0 = self.srl_helper(data)
        ser1 = self.wu_helper(data)
        self.assertEqual(ser0, ser1)

    def test_simulate_java_output_2(self):
        data = (42, 4242, 424242, 42424242, -42,
                42, 424242, 4242424242,
                u"hello world", u"oggi è giovedì", u"à Text object")
        rule = "iiiiiLLLSSs"
        try:
            ser0 = _get_java_output_stream(self.wd).getvalue()
            ser1 = self.srl_helper(data, rule)
            self.assertEqual(ser0, ser1)
        finally:
            pass

    def test_serialize_old_style_filename(self):
        fn = 'some_filename.file'
        srl.serialize_old_style_filename(fn, self.stream)
        self.stream.seek(0)
        new_fn = srl.deserialize_old_style_filename(self.stream)
        self.assertEqual(fn, new_fn)