def main(): program_name = './avro_pyrw.py' data_in = './users.avro' path = os.path.realpath(data_in) length = os.stat(path).st_size input_split = InputSplit.to_string('file://' + path, 0, length) out_path = os.path.realpath('.') conf = { "mapreduce.task.partition": "0", "mapreduce.task.output.dir": 'file://%s' % out_path, } hsn = HadoopSimulatorNetwork(program=program_name) hsn.run(None, None, conf, input_split=input_split)
def main(): program_name = './avro_pyrw.py' data_in = './users.avro' path = os.path.realpath(data_in) length = os.stat(path).st_size input_split = InputSplit.to_string('file://'+path, 0, length) out_path = os.path.realpath('.') conf = { "mapreduce.task.partition": "0", "mapreduce.task.output.dir": 'file://%s' % out_path, } hsn = HadoopSimulatorNetwork(program=program_name) hsn.run(None, None, conf, input_split=input_split)
def create_configuration(): data_in = os.path.join(EXAMPLES_DIR, 'input', 'alice_1.txt') data_out = 'results.txt' data_in_uri = 'file://%s' % data_in data_in_size = os.stat(data_in).st_size output_dir = tempfile.mkdtemp(prefix="pydoop_") output_dir_uri = 'file://%s' % output_dir conf = { "mapred.map.tasks": "2", "mapred.reduce.tasks": "1", "mapred.job.name": "wordcount", "mapred.work.output.dir": output_dir_uri, "mapred.task.partition": "0", } input_split = InputSplit.to_string(data_in_uri, 0, data_in_size) return data_in, data_out, conf, input_split, output_dir
def main(argv): try: data_in = argv[1] except IndexError: sys.exit("Usage: python %s AVRO_FILE" % argv[0]) shutil.copy('../schemas/stats.avsc', 'stats.avsc') program_name = cp_script('./avro_pyrw.py') path = os.path.realpath(data_in) length = os.stat(path).st_size input_split = InputSplit.to_string('file://' + path, 0, length) out_path = os.path.realpath('.') conf = { "mapreduce.task.partition": "0", "mapreduce.task.output.dir": 'file://%s' % out_path, } hsn = HadoopSimulatorNetwork(program=program_name) hsn.run(None, None, conf, input_split=input_split)
def get_areader(offset, length): isplit = InputSplit(InputSplit.to_string(url, offset, length)) ctx = FunkyCtx(isplit) return AvroReader(ctx)