Python read_chunk_by_word Examples

Programming Language: Python

Namespace/Package Name: splitter

Method/Function: read_chunk_by_word

Examples at hotexamples.com: 3

Python read_chunk_by_word - 3 examples found. These are the top rated real world Python examples of splitter.read_chunk_by_word extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def map_function(self, job_name, chunk_size, input_file, no_of_reducers, start_index):
        map_output = ""
        if job_name == Jobs.WORD_COUNT_JOB:
            values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
            engine = mapreduce.Engine(values, WordCountMap, WordCountReduce, mapreduce.Partition, no_of_reducers)
            Worker.engine = engine
            map_output = engine.map_phase()
        elif job_name == Jobs.SORTING_JOB:
            values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
            engine = mapreduce.Engine(values, SortingMap, SortingReduce, mapreduce.Partition, no_of_reducers)
            Worker.engine = engine
            map_output = engine.map_phase()
        elif job_name == Jobs.HAMMING_ENCODE_JOB:
            values = splitter.read_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingEncodingMap, HammingEncodingReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_encode_map_phase()
        elif job_name == Jobs.HAMMING_DECODE_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingDecodingMap, HammingDecodingReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_decode_map_phase()
        elif job_name == Jobs.HAMMING_ERROR_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingErrorMap, HammingErrorReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_error_map_phase()
        elif job_name == Jobs.HAMMING_CHECK_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingCheckMap, HammingCheckReduce, mapreduce.Partition,
                                              no_of_reducers, start_index)
            Worker.engine = engine
            map_output = engine.hamming_check_map_phase()
        elif job_name == Jobs.HAMMING_FIX_JOB:
            values = splitter.read_binary_chunk(input_file, start_index, chunk_size)
            engine = hamming_mapreduce.Engine(values, HammingFixMap, HammingFixReduce, mapreduce.Partition,
                                              no_of_reducers,
                                              start_index)
            Worker.engine = engine
            map_output = engine.hamming_fix_map_phase()
        else:
            print "Invalid Job Name ............."


        Worker.engine = engine

        # elif job_name == Jobs.HAMMING_ENCODE_JOB:

        return map_output

Example #2

Show file

File: mr_seq.py Project: kchinnasamy/map_reduce

splits = splitter.split_file(input_file, chunk_size)
engine = None


def silentremove(filename):
    try:
        os.remove(filename)
    except OSError as e:  # this would be "except OSError, e:" before Python 2.6
        # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
        pass  # re-raise exception if a different error occured


# Map Phase
for start_index in splits:
    if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB:
        values = splitter.read_chunk_by_word(input_file, start_index,
                                             chunk_size)
    else:
        values = splitter.read_chunk(input_file, start_index, chunk_size)
    if (job_name == jobs.Jobs.WORD_COUNT_JOB):
        engine = mapreduce.Engine(values, jobs.WordCountMap,
                                  jobs.WordCountReduce, mapreduce.Partition,
                                  no_of_reducers)
    elif (job_name == jobs.Jobs.SORTING_JOB):
        engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce,
                                  mapreduce.Partition, no_of_reducers)

    map_output = engine.map_phase()
    completed_jobs[start_index] = map_output
    print "------------- Completed Map Job_ID: {0}--------------".format(
        start_index)

Example #3

Show file

File: mr_seq.py Project: kpjs4s/map_reduce

splits = splitter.split_file(input_file, chunk_size)
engine = None


def silentremove(filename):
    try:
        os.remove(filename)
    except OSError as e:  # this would be "except OSError, e:" before Python 2.6
        # if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
        pass  # re-raise exception if a different error occured


# Map Phase
for start_index in splits:
    if job_name == jobs.Jobs.SORTING_JOB or job_name == jobs.Jobs.WORD_COUNT_JOB:
        values = splitter.read_chunk_by_word(input_file, start_index, chunk_size)
    else:
        values = splitter.read_chunk(input_file, start_index, chunk_size)
    if job_name == jobs.Jobs.WORD_COUNT_JOB:
        engine = mapreduce.Engine(values, jobs.WordCountMap, jobs.WordCountReduce, mapreduce.Partition, no_of_reducers)
    elif job_name == jobs.Jobs.SORTING_JOB:
        engine = mapreduce.Engine(values, jobs.SortingMap, jobs.SortingReduce, mapreduce.Partition, no_of_reducers)

    map_output = engine.map_phase()
    completed_jobs[start_index] = map_output
    print "------------- Completed Map Job_ID: {0}--------------".format(start_index)

silentremove(output_file)

# Shuffle and Reduce Phase
for reducer_id in reducer_list: