Python read_chunk 예제들, splitter.read_chunk Python 예제들

예제 #1

0

파일 보기

    def map_function(self, job_name, chunk_size, input_file, no_of_reducers, start_index):
        map_output = ""
        if job_name == Jobs.WORD_COUNT_JOB:
            values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
            engine = mapreduce.Engine(values, WordCountMap, WordCountReduce, mapreduce.Partition, no_of_reducers)
            Worker.engine = engine
            map_output = engine.map_phase()
        elif job_name == Jobs.SORTING_JOB:
            values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
            engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, no_of_reducers)
            Worker.engine = engine
            map_output = engine.map_phase()
        elif job_name == Jobs.HAMMING_ENCODE_JOB:
            values = splitter.read_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_encode_map_phase()
        elif job_name == Jobs.HAMMING_DECODE_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingDecodingMap, HammingDecodingReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_decode_map_phase()
        elif job_name == Jobs.HAMMING_ERROR_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingErrorMap, HammingErrorReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_error_map_phase()
        elif job_name == Jobs.HAMMING_CHECK_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingCheckMap, HammingCheckReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_check_map_phase()
        elif job_name == Jobs.HAMMING_FIX_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingFixMap, HammingFixReduce, mapreduce.Partition,
                                              no_of_reducers,
                                              start_index)
            Worker.engine = engine
            map_output = engine.hamming_fix_map_phase()
        else:
            print "Invalid Job Name ............."


        Worker.engine = engine

        # elif job_name == Jobs.HAMMING_ENCODE_JOB:

        return map_output

예제 #2

0

파일 보기

class SortingMap(mapreduce.Map):
    def map(self, k, v):
        words = v.split()
        for w in words:
            self.emit('1', w)


class SortingReduce(mapreduce.Reduce):
    def reduce(self, k, vlist):
        vlist.sort()
        for v in vlist:
            self.emit(v)


if __name__ == '__main__':

    # f = open("sort_text.txt")
    # values = f.readlines()
    # f.close()
    values = splitter.read_chunk("wordcount.py", 1, 100)

    engine = mapreduce.Engine(values, SortingMap, SortingReduce,
                              mapreduce.Partition, 2)
    map_output = engine.map_phase()

    for partition in map_output:
        result_list = engine.reduce_phase(partition)
        for r in result_list:
            print r
        print "------------------------------------------------------------------------------------------"

예제 #3

0

파일 보기

파일: mr_seq.py 프로젝트: kchinnasamy/map_reduce

def silentremove(filename):
    try:
        os.remove(filename)
    except OSError as e:  # this would be "except OSError, e:" before Python 2.6
        # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
        pass  # re-raise exception if a different error occured


# Map Phase
for start_index in splits:
    if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB:
        values = splitter.read_chunk_by_word(input_file, start_index,
                                             chunk_size)
    else:
        values = splitter.read_chunk(input_file, start_index, chunk_size)
    if (job_name == jobs.Jobs.WORD_COUNT_JOB):
        engine = mapreduce.Engine(values, jobs.WordCountMap,
                                  jobs.WordCountReduce, mapreduce.Partition,
                                  no_of_reducers)
    elif (job_name == jobs.Jobs.SORTING_JOB):
        engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce,
                                  mapreduce.Partition, no_of_reducers)

    map_output = engine.map_phase()
    completed_jobs[start_index] = map_output
    print "------------- Completed Map Job_ID: {0}--------------".format(
        start_index)

silentremove(output_file)

예제 #4

0

파일 보기

        if (err_position != 0):
            if (hamming_str[err_position - 1] == '0'):
                print "fix error on position: ", err_position
                hamming_str[err_position - 1] = '1'
            else:
                hamming_str[err_position - 1] = '0'
        self.emit(k, ''.join(hamming_str))


class HammingFixReduce(hamming_mapreduce.Reduce):
    def reduce(self, k, vlist):
        self.emit(''.join(vlist))


if __name__ == '__main__':
    values = splitter.read_chunk("Hello.txt", 1, 2)
    print values
    # print "---------------------------------------------------"
    engine = hamming_mapreduce.Engine(values, HammingEncodingMap,
                                      HammingEncodingReduce,
                                      hamming_mapreduce.Partition, 1, 0)
    ## engine need index
    map_output = engine.hamming_encode_map_phase()
    encode_string = ''
    for partition in map_output:
        result_list = engine.reduce_phase(partition)
        for r in result_list:
            encode_string += r
        # print "------------------------------------------------------------------------------------------"
    # print encode_string
    outfile = open("encode_binary", 'w')

예제 #5

0

파일 보기

파일: mr_seq.py 프로젝트: kpjs4s/map_reduce


def silentremove(filename):
    try:
        os.remove(filename)
    except OSError as e:  # this would be "except OSError, e:" before Python 2.6
        # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
        pass  # re-raise exception if a different error occured


# Map Phase
for start_index in splits:
    if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB:
        values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
    else:
        values = splitter.read_chunk(input_file, start_index, chunk_size)
    if job_name == jobs.Jobs.WORD_COUNT_JOB:
        engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers)
    elif job_name == jobs.Jobs.SORTING_JOB:
        engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers)

    map_output = engine.map_phase()
    completed_jobs[start_index] = map_output
    print "------------- Completed Map Job_ID: {0}--------------".format(start_index)

silentremove(output_file)

# Shuffle and Reduce Phase
for reducer_id in reducer_list:

    shuffle_data = shuffle_info(reducer_id, completed_jobs)

예제 #6

0

파일 보기

파일: sorting.py 프로젝트: kpjs4s/map_reduce

class SortingMap(mapreduce.Map):
    def map(self, k, v):
        words = v.split()
        for w in words:
            self.emit('1', w)


class SortingReduce(mapreduce.Reduce):
    def reduce(self, k, vlist):
        vlist.sort()
        for v in vlist:
            self.emit(v)


if __name__ == '__main__':

    # f = open("sort_text.txt")
    # values = f.readlines()
    # f.close()
    values = splitter.read_chunk("wordcount.py", 1, 100)


    engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, 2)
    map_output = engine.map_phase()

    for partition in map_output:
        result_list = engine.reduce_phase(partition);
        for r in result_list:
            print r
        print "------------------------------------------------------------------------------------------"

예제 #7

0

파일 보기

파일: hamming.py 프로젝트: kpjs4s/map_reduce

            if(hamming_str[err_position-1]=='0'):
                print "fix error on position: ",err_position
                hamming_str[err_position-1] = '1'
            else:
                hamming_str[err_position-1] = '0'
        self.emit(k,''.join(hamming_str))

class HammingFixReduce(hamming_mapreduce.Reduce):
    def reduce(self, k, vlist):
        self.emit(''.join(vlist))




if __name__ == '__main__':
    values = splitter.read_chunk("Hello.txt", 1, 2)
    print values
    # print "---------------------------------------------------"
    engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, hamming_mapreduce.Partition,1 , 0)
    ## engine need index
    map_output = engine.hamming_encode_map_phase()
    encode_string = ''
    for partition in map_output:
        result_list = engine.reduce_phase(partition);
        for r in result_list:
            encode_string+=r
        # print "------------------------------------------------------------------------------------------"
    # print encode_string
    outfile = open("encode_binary", 'w')
    outfile.write(encode_string)
    outfile.close()