Ejemplo n.º 1
0
    def map_function(self, job_name, chunk_size, input_file, no_of_reducers, start_index):
        map_output = ""
        if job_name == Jobs.WORD_COUNT_JOB:
            values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
            engine = mapreduce.Engine(values, WordCountMap, WordCountReduce, mapreduce.Partition, no_of_reducers)
            Worker.engine = engine
            map_output = engine.map_phase()
        elif job_name == Jobs.SORTING_JOB:
            values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
            engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, no_of_reducers)
            Worker.engine = engine
            map_output = engine.map_phase()
        elif job_name == Jobs.HAMMING_ENCODE_JOB:
            values = splitter.read_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_encode_map_phase()
        elif job_name == Jobs.HAMMING_DECODE_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingDecodingMap, HammingDecodingReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_decode_map_phase()
        elif job_name == Jobs.HAMMING_ERROR_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingErrorMap, HammingErrorReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_error_map_phase()
        elif job_name == Jobs.HAMMING_CHECK_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingCheckMap, HammingCheckReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_check_map_phase()
        elif job_name == Jobs.HAMMING_FIX_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingFixMap, HammingFixReduce, mapreduce.Partition,
                                              no_of_reducers,
                                              start_index)
            Worker.engine = engine
            map_output = engine.hamming_fix_map_phase()
        else:
            print "Invalid Job Name ............."


        Worker.engine = engine

        # elif job_name == Jobs.HAMMING_ENCODE_JOB:

        return map_output
Ejemplo n.º 2
0
splits = splitter.split_file(input_file, chunk_size)
engine = None


def silentremove(filename):
    try:
        os.remove(filename)
    except OSError as e:  # this would be "except OSError, e:" before Python 2.6
        # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
        pass  # re-raise exception if a different error occured


# Map Phase
for start_index in splits:
    if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB:
        values = splitter.read_chunk_by_word(input_file, start_index,
                                             chunk_size)
    else:
        values = splitter.read_chunk(input_file, start_index, chunk_size)
    if (job_name == jobs.Jobs.WORD_COUNT_JOB):
        engine = mapreduce.Engine(values, jobs.WordCountMap,
                                  jobs.WordCountReduce, mapreduce.Partition,
                                  no_of_reducers)
    elif (job_name == jobs.Jobs.SORTING_JOB):
        engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce,
                                  mapreduce.Partition, no_of_reducers)

    map_output = engine.map_phase()
    completed_jobs[start_index] = map_output
    print "------------- Completed Map Job_ID: {0}--------------".format(
        start_index)
Ejemplo n.º 3
0
splits = splitter.split_file(input_file, chunk_size)
engine = None


def silentremove(filename):
    try:
        os.remove(filename)
    except OSError as e:  # this would be "except OSError, e:" before Python 2.6
        # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
        pass  # re-raise exception if a different error occured


# Map Phase
for start_index in splits:
    if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB:
        values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
    else:
        values = splitter.read_chunk(input_file, start_index, chunk_size)
    if job_name == jobs.Jobs.WORD_COUNT_JOB:
        engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers)
    elif job_name == jobs.Jobs.SORTING_JOB:
        engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers)

    map_output = engine.map_phase()
    completed_jobs[start_index] = map_output
    print "------------- Completed Map Job_ID: {0}--------------".format(start_index)

silentremove(output_file)

# Shuffle and Reduce Phase
for reducer_id in reducer_list: