def map_function(self, job_name, chunk_size, input_file, no_of_reducers, start_index): map_output = "" if job_name == Jobs.WORD_COUNT_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) engine = mapreduce.Engine(values, WordCountMap, WordCountReduce, mapreduce.Partition, no_of_reducers) Worker.engine = engine map_output = engine.map_phase() elif job_name == Jobs.SORTING_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, no_of_reducers) Worker.engine = engine map_output = engine.map_phase() elif job_name == Jobs.HAMMING_ENCODE_JOB: values = splitter.read_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_encode_map_phase() elif job_name == Jobs.HAMMING_DECODE_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingDecodingMap, HammingDecodingReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_decode_map_phase() elif job_name == Jobs.HAMMING_ERROR_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingErrorMap, HammingErrorReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_error_map_phase() elif job_name == Jobs.HAMMING_CHECK_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingCheckMap, HammingCheckReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_check_map_phase() elif job_name == Jobs.HAMMING_FIX_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingFixMap, HammingFixReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_fix_map_phase() else: print "Invalid Job Name ............." Worker.engine = engine # elif job_name == Jobs.HAMMING_ENCODE_JOB: return map_output
splits = splitter.split_file(input_file, chunk_size) engine = None def silentremove(filename): try: os.remove(filename) except OSError as e: # this would be "except OSError, e:" before Python 2.6 # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory pass # re-raise exception if a different error occured # Map Phase for start_index in splits: if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) else: values = splitter.read_chunk(input_file, start_index, chunk_size) if (job_name == jobs.Jobs.WORD_COUNT_JOB): engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers) elif (job_name == jobs.Jobs.SORTING_JOB): engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers) map_output = engine.map_phase() completed_jobs[start_index] = map_output print "------------- Completed Map Job_ID: {0}--------------".format( start_index)
splits = splitter.split_file(input_file, chunk_size) engine = None def silentremove(filename): try: os.remove(filename) except OSError as e: # this would be "except OSError, e:" before Python 2.6 # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory pass # re-raise exception if a different error occured # Map Phase for start_index in splits: if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) else: values = splitter.read_chunk(input_file, start_index, chunk_size) if job_name == jobs.Jobs.WORD_COUNT_JOB: engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers) elif job_name == jobs.Jobs.SORTING_JOB: engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers) map_output = engine.map_phase() completed_jobs[start_index] = map_output print "------------- Completed Map Job_ID: {0}--------------".format(start_index) silentremove(output_file) # Shuffle and Reduce Phase for reducer_id in reducer_list: