def test_write(N, fname): with open(fname, 'w') as f: writer = BinaryWriter(f) def foo(): writer_send = writer.send while True: key, val = yield True writer_send("mapItem", key, val) foo_fg = foo() foo_fg.next() for i in range(N): foo_fg.send(("key", "val"))
def stream_writer(fname, data): with open(fname, 'w') as f: bw = BinaryWriter(f) for vals in data: cmd, args = vals[0], vals[1:] bw.send(cmd, *args)
def write_data(N, fname): with open(fname, 'w') as f: writer = BinaryWriter(f) for i in range(N): writer.send('mapItem', "key", "val") writer.send('close')
def __write_cmd_file(self, mode): if mode != 'K' and mode != 'V': # FIXME: add support for 'KV' raise RuntimeError("Mode %r not supported" % (mode, )) schema_prop = (AVRO_KEY_INPUT_SCHEMA if mode == 'K' else AVRO_VALUE_INPUT_SCHEMA) cmd_fn = self._mkfn('map_in') serializer = AvroSerializer(self.schema) with open(cmd_fn, 'wb') as f: bw = BinaryWriter(f) bw.send(bw.START_MESSAGE, 0) bw.send(bw.SET_JOB_CONF, AVRO_INPUT, mode, schema_prop, str(self.schema), 'mapreduce.pipes.isjavarecordreader', 'true', 'mapreduce.pipes.isjavarecordwriter', 'true') bw.send(bw.RUN_MAP, 'input_split', 0, True) bw.send(bw.SET_INPUT_TYPES, 'key_type', 'value_type') for r in self.records: if mode == 'K': bw.send(bw.MAP_ITEM, serializer.serialize(r), 'v') else: bw.send(bw.MAP_ITEM, 'k', serializer.serialize(r)) bw.send(bw.CLOSE) bw.close() return cmd_fn
def __write_cmd_file(self, mode): if mode != 'K' and mode != 'V': # FIXME: add support for 'KV' raise RuntimeError("Mode %r not supported" % (mode,)) schema_prop = pydoop.PROPERTIES[ 'AVRO_%s_INPUT_SCHEMA' % ('KEY' if mode == 'K' else 'VALUE') ] cmd_fn = self._mkfn('map_in') serializer = AvroSerializer(self.schema) with open(cmd_fn, 'w') as f: bwriter = BinaryWriter(f) bwriter.send('start', 0) bwriter.send('setJobConf', ( pydoop.PROPERTIES['AVRO_INPUT'], mode, schema_prop, str(self.schema) )), bwriter.send('setInputTypes', 'key_type', 'value_type') bwriter.send('runMap', 'input_split', 0, False) for r in self.records: if mode == 'K': bwriter.send('mapItem', serializer.serialize(r), 'v') else: bwriter.send('mapItem', 'k', serializer.serialize(r)) bwriter.send('close') return cmd_fn