Ejemplo n.º 1
0
def op_reduce(job):
        global job_name

        job_inputs = this_inputs()

        msg("Received a new reduce job!")
        
        do_sort = int(job['sort'])
        mem_sort_limit = int(job['mem_sort_limit'])
        required_modules = job['required_modules'].split()
        
        if 'ext_reduce' in job:
                if "ext_params" in job:
                        red_params = job['ext_params']
                else:
                        red_params = "0\n"
                external.prepare(job['ext_reduce'], red_params,
                        EXT_REDUCE % job_name)
                fun_reduce.func_code = external.ext_reduce.func_code
        else:
                fun_reduce.func_code = marshal.loads(job['reduce'])
                red_params = cPickle.loads(job['params'])

        for m in required_modules:
                fun_reduce.func_globals.setdefault(m, __import__(m))

        red_in = ReduceReader(job_inputs, do_sort, mem_sort_limit)
        red_out = ReduceOutput()
        msg("Starting reduce")
        fun_reduce(red_in.iter(), red_out, red_params)
        msg("Reduce done")
        red_out.close()
        external.close_ext()

        msg("%d %s" % (this_partition(), red_out.disco_address()), "OUT")
Ejemplo n.º 2
0
def op_map(job):
        global job_name
        
        job_input = this_inputs()
        msg("Received a new map job!")
        
        if len(job_input) != 1:
                err("Map can only handle one input. Got: %s" % 
                        " ".join(job_input))

        nr_reduces = int(job['nr_reduces'])
        required_modules = job['required_modules'].split()
        fun_map_reader.func_code = marshal.loads(job['map_reader'])
        fun_map_writer.func_code = marshal.loads(job['map_writer'])
        fun_partition.func_code = marshal.loads(job['partition'])
        for m in required_modules:
                fun_map_reader.func_globals.setdefault(m, __import__(m))
                fun_partition.func_globals.setdefault(m, __import__(m))
        
        if 'ext_map' in job:
                if 'ext_params' in job:
                        map_params = job['ext_params']
                else:
                        map_params = "0\n"
                external.prepare(job['ext_map'],
                        map_params, EXT_MAP % job_name)
                fun_map.func_code = external.ext_map.func_code
        else:
                map_params = cPickle.loads(job['params'])        
                fun_map.func_code = marshal.loads(job['map'])
        
        for m in required_modules:
                fun_map.func_globals.setdefault(m, __import__(m))

        if 'map_init' in job:
                fun_init.func_code = marshal.loads(job['map_init'])

        if 'combiner' in job:
                fun_combiner.func_code = marshal.loads(job['combiner'])
                for m in required_modules:
                        fun_combiner.func_globals.setdefault(m, __import__(m))
                partitions = [MapOutput(i, map_params, fun_combiner)\
                        for i in range(nr_reduces)]
        else:
                partitions = [MapOutput(i, map_params) for i in range(nr_reduces)]
        
        run_map(job_input[0], partitions, map_params)
        for p in partitions:
                p.close()
        if 'chunked' in job:
                merge_chunks(partitions)
                out = "chunk://%s/%s/map-chunk-%d" %\
                        (this_host(), job_name, this_partition())
        else:
                out = partitions[0].disco_address()
        
        external.close_ext()
        msg("%d %s" % (this_partition(), out), "OUT")
Ejemplo n.º 3
0
def op_reduce(job):
        job_inputs = this_inputs()

        msg("Received a new reduce job!")
        
        do_sort = int(job['sort'])
        mem_sort_limit = int(job['mem_sort_limit'])
        req_mod = job['required_modules'].split()
        
        if 'reduce_init' in job:
                fun_init.func_code = marshal.loads(job['reduce_init'])

        fun_reduce_reader.func_code = marshal.loads(job['reduce_reader'])
        fun_reduce_writer.func_code = marshal.loads(job['reduce_writer'])
        
        if 'required_files' in job:
                write_files(marshal.loads(job['required_files']), REQ_FILES)
                sys.path.insert(0, REQ_FILES)
        
        import_modules(req_mod, [fun_reduce_reader, fun_reduce_writer,\
            fun_reduce, fun_init])
         
        if 'ext_reduce' in job:
                if "ext_params" in job:
                        red_params = job['ext_params']
                else:
                        red_params = "0\n"
                external.prepare(job['ext_reduce'], red_params, EXT_REDUCE)
                fun_reduce.func_code = external.ext_reduce.func_code
        else:
                fun_reduce.func_code = marshal.loads(job['reduce'])
                red_params = cPickle.loads(job['params'])

        red_in = ReduceReader(job_inputs, do_sort, mem_sort_limit).iter()
        red_out = ReduceOutput(red_params)
        
        msg("Starting reduce")
        fun_init(red_in, red_params)
        fun_reduce(red_in, red_out, red_params)
        msg("Reduce done")
        
        red_out.close()
        external.close_ext()
        
        index = cStringIO.StringIO(os.path.basename(red_out.fname) + "\n")
        safe_append(index, REDUCE_INDEX)
        msg("dir://%s/%sreduce-index.txt" % (this_host(), JOB_HOME), "OUT")
Ejemplo n.º 4
0
def op_map(job):
        job_input = this_inputs()
        msg("Received a new map job!")
        
        if len(job_input) != 1:
                err("Map can only handle one input. Got: %s" % 
                        " ".join(job_input))

        nr_reduces = int(job['nr_reduces'])
        nr_part = max(1, nr_reduces)
        fun_map_reader.func_code = marshal.loads(job['map_reader'])
        fun_map_writer.func_code = marshal.loads(job['map_writer'])
        fun_partition.func_code = marshal.loads(job['partition'])

        if 'map_init' in job:
                fun_init.func_code = marshal.loads(job['map_init'])
        
        if 'required_files' in job:
                write_files(marshal.loads(job['required_files']), REQ_FILES)
                sys.path.insert(0, REQ_FILES)

        req_mod = job['required_modules'].split()
        import_modules(req_mod, [fun_map_reader, fun_map_writer,
            fun_partition, fun_map, fun_combiner, fun_init])

        if 'ext_map' in job:
                if 'ext_params' in job:
                        map_params = job['ext_params']
                else:
                        map_params = "0\n"
                external.prepare(job['ext_map'], map_params, EXT_MAP)
                fun_map.func_code = external.ext_map.func_code
        else:
                map_params = cPickle.loads(job['params'])        
                fun_map.func_code = marshal.loads(job['map'])
        

        if 'combiner' in job:
                fun_combiner.func_code = marshal.loads(job['combiner'])
                partitions = [MapOutput(i, map_params, fun_combiner)\
                        for i in range(nr_part)]
        else:
                partitions = [MapOutput(i, map_params) for i in range(nr_part)]
        
        run_map(job_input[0], partitions, map_params)
        external.close_ext()
        
        for p in partitions:
                p.close()

        if nr_reduces:
                merge_partitions(partitions)
                n = os.path.basename(PART_OUTPUT % 0)
                msg("dir://%s/%s%s:%d" % (this_host(), JOB_HOME, n,
                        len(partitions) - 1), "OUT")
        else:
                res = [os.path.basename(p.fname) for p in partitions]
                index = cStringIO.StringIO("\n".join(res) + "\n")
                safe_append(index, MAP_INDEX)
                msg("dir://%s/%smap-index.txt" %\
                        (this_host(), JOB_HOME), "OUT")