def map_function(self, job_name, chunk_size, input_file, no_of_reducers, start_index): map_output = "" if job_name == Jobs.WORD_COUNT_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) engine = mapreduce.Engine(values, WordCountMap, WordCountReduce, mapreduce.Partition, no_of_reducers) Worker.engine = engine map_output = engine.map_phase() elif job_name == Jobs.SORTING_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, no_of_reducers) Worker.engine = engine map_output = engine.map_phase() elif job_name == Jobs.HAMMING_ENCODE_JOB: values = splitter.read_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_encode_map_phase() elif job_name == Jobs.HAMMING_DECODE_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingDecodingMap, HammingDecodingReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_decode_map_phase() elif job_name == Jobs.HAMMING_ERROR_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingErrorMap, HammingErrorReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_error_map_phase() elif job_name == Jobs.HAMMING_CHECK_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingCheckMap, HammingCheckReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_check_map_phase() elif job_name == Jobs.HAMMING_FIX_JOB: values = splitter.read_binary_chunk(input_file, start_index, chunk_size) engine = hamming_mapreduce.Engine(values, HammingFixMap, HammingFixReduce, mapreduce.Partition, no_of_reducers, start_index) Worker.engine = engine map_output = engine.hamming_fix_map_phase() else: print "Invalid Job Name ............." Worker.engine = engine # elif job_name == Jobs.HAMMING_ENCODE_JOB: return map_output
class SortingMap(mapreduce.Map): def map(self, k, v): words = v.split() for w in words: self.emit('1', w) class SortingReduce(mapreduce.Reduce): def reduce(self, k, vlist): vlist.sort() for v in vlist: self.emit(v) if __name__ == '__main__': # f = open("sort_text.txt") # values = f.readlines() # f.close() values = splitter.read_chunk("wordcount.py", 1, 100) engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, 2) map_output = engine.map_phase() for partition in map_output: result_list = engine.reduce_phase(partition) for r in result_list: print r print "------------------------------------------------------------------------------------------"
def silentremove(filename): try: os.remove(filename) except OSError as e: # this would be "except OSError, e:" before Python 2.6 # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory pass # re-raise exception if a different error occured # Map Phase for start_index in splits: if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) else: values = splitter.read_chunk(input_file, start_index, chunk_size) if (job_name == jobs.Jobs.WORD_COUNT_JOB): engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers) elif (job_name == jobs.Jobs.SORTING_JOB): engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers) map_output = engine.map_phase() completed_jobs[start_index] = map_output print "------------- Completed Map Job_ID: {0}--------------".format( start_index) silentremove(output_file)
if (err_position != 0): if (hamming_str[err_position - 1] == '0'): print "fix error on position: ", err_position hamming_str[err_position - 1] = '1' else: hamming_str[err_position - 1] = '0' self.emit(k, ''.join(hamming_str)) class HammingFixReduce(hamming_mapreduce.Reduce): def reduce(self, k, vlist): self.emit(''.join(vlist)) if __name__ == '__main__': values = splitter.read_chunk("Hello.txt", 1, 2) print values # print "---------------------------------------------------" engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, hamming_mapreduce.Partition, 1, 0) ## engine need index map_output = engine.hamming_encode_map_phase() encode_string = '' for partition in map_output: result_list = engine.reduce_phase(partition) for r in result_list: encode_string += r # print "------------------------------------------------------------------------------------------" # print encode_string outfile = open("encode_binary", 'w')
def silentremove(filename): try: os.remove(filename) except OSError as e: # this would be "except OSError, e:" before Python 2.6 # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory pass # re-raise exception if a different error occured # Map Phase for start_index in splits: if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB: values = splitter.read_chunk_by_word(input_file, start_index, chunk_size) else: values = splitter.read_chunk(input_file, start_index, chunk_size) if job_name == jobs.Jobs.WORD_COUNT_JOB: engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers) elif job_name == jobs.Jobs.SORTING_JOB: engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers) map_output = engine.map_phase() completed_jobs[start_index] = map_output print "------------- Completed Map Job_ID: {0}--------------".format(start_index) silentremove(output_file) # Shuffle and Reduce Phase for reducer_id in reducer_list: shuffle_data = shuffle_info(reducer_id, completed_jobs)
class SortingMap(mapreduce.Map): def map(self, k, v): words = v.split() for w in words: self.emit('1', w) class SortingReduce(mapreduce.Reduce): def reduce(self, k, vlist): vlist.sort() for v in vlist: self.emit(v) if __name__ == '__main__': # f = open("sort_text.txt") # values = f.readlines() # f.close() values = splitter.read_chunk("wordcount.py", 1, 100) engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, 2) map_output = engine.map_phase() for partition in map_output: result_list = engine.reduce_phase(partition); for r in result_list: print r print "------------------------------------------------------------------------------------------"
if(hamming_str[err_position-1]=='0'): print "fix error on position: ",err_position hamming_str[err_position-1] = '1' else: hamming_str[err_position-1] = '0' self.emit(k,''.join(hamming_str)) class HammingFixReduce(hamming_mapreduce.Reduce): def reduce(self, k, vlist): self.emit(''.join(vlist)) if __name__ == '__main__': values = splitter.read_chunk("Hello.txt", 1, 2) print values # print "---------------------------------------------------" engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, hamming_mapreduce.Partition,1 , 0) ## engine need index map_output = engine.hamming_encode_map_phase() encode_string = '' for partition in map_output: result_list = engine.reduce_phase(partition); for r in result_list: encode_string+=r # print "------------------------------------------------------------------------------------------" # print encode_string outfile = open("encode_binary", 'w') outfile.write(encode_string) outfile.close()