Ejemplo n.º 1
0
def main():
    program_name = './avro_pyrw.py'
    data_in = './users.avro'
    path = os.path.realpath(data_in)
    length = os.stat(path).st_size
    input_split = InputSplit.to_string('file://' + path, 0, length)
    out_path = os.path.realpath('.')
    conf = {
        "mapreduce.task.partition": "0",
        "mapreduce.task.output.dir": 'file://%s' % out_path,
    }
    hsn = HadoopSimulatorNetwork(program=program_name)
    hsn.run(None, None, conf, input_split=input_split)
Ejemplo n.º 2
0
def main():
    program_name = './avro_pyrw.py'
    data_in = './users.avro'
    path = os.path.realpath(data_in)
    length = os.stat(path).st_size
    input_split = InputSplit.to_string('file://'+path, 0, length)
    out_path = os.path.realpath('.')
    conf = {
        "mapreduce.task.partition": "0",
        "mapreduce.task.output.dir": 'file://%s' % out_path,
    }
    hsn = HadoopSimulatorNetwork(program=program_name)
    hsn.run(None, None, conf, input_split=input_split)
Ejemplo n.º 3
0
Archivo: run.py Proyecto: crs4/pydoop
def create_configuration():
    data_in = os.path.join(EXAMPLES_DIR, 'input', 'alice_1.txt')
    data_out = 'results.txt'
    data_in_uri = 'file://%s' % data_in
    data_in_size = os.stat(data_in).st_size
    output_dir = tempfile.mkdtemp(prefix="pydoop_")
    output_dir_uri = 'file://%s' % output_dir
    conf = {
        "mapred.map.tasks": "2",
        "mapred.reduce.tasks": "1",
        "mapred.job.name": "wordcount",
        "mapred.work.output.dir": output_dir_uri,
        "mapred.task.partition": "0",
    }
    input_split = InputSplit.to_string(data_in_uri, 0, data_in_size)
    return data_in, data_out, conf, input_split, output_dir
Ejemplo n.º 4
0
def create_configuration():
    data_in = os.path.join(EXAMPLES_DIR, 'input', 'alice_1.txt')
    data_out = 'results.txt'
    data_in_uri = 'file://%s' % data_in
    data_in_size = os.stat(data_in).st_size
    output_dir = tempfile.mkdtemp(prefix="pydoop_")
    output_dir_uri = 'file://%s' % output_dir
    conf = {
        "mapred.map.tasks": "2",
        "mapred.reduce.tasks": "1",
        "mapred.job.name": "wordcount",
        "mapred.work.output.dir": output_dir_uri,
        "mapred.task.partition": "0",
    }
    input_split = InputSplit.to_string(data_in_uri, 0, data_in_size)
    return data_in, data_out, conf, input_split, output_dir
Ejemplo n.º 5
0
def main(argv):
    try:
        data_in = argv[1]
    except IndexError:
        sys.exit("Usage: python %s AVRO_FILE" % argv[0])
    shutil.copy('../schemas/stats.avsc', 'stats.avsc')
    program_name = cp_script('./avro_pyrw.py')
    path = os.path.realpath(data_in)
    length = os.stat(path).st_size
    input_split = InputSplit.to_string('file://' + path, 0, length)
    out_path = os.path.realpath('.')
    conf = {
        "mapreduce.task.partition": "0",
        "mapreduce.task.output.dir": 'file://%s' % out_path,
    }
    hsn = HadoopSimulatorNetwork(program=program_name)
    hsn.run(None, None, conf, input_split=input_split)
Ejemplo n.º 6
0
 def get_areader(offset, length):
     isplit = InputSplit(InputSplit.to_string(url, offset, length))
     ctx = FunkyCtx(isplit)
     return AvroReader(ctx)
Ejemplo n.º 7
0
 def get_areader(offset, length):
     isplit = InputSplit(InputSplit.to_string(url, offset, length))
     ctx = FunkyCtx(isplit)
     return AvroReader(ctx)