def run_mapper(self, index): """Runs the implemented mapper """ input_split_file = open(settings.get_input_split_file(index), "r") key = input_split_file.readline() value = input_split_file.read() input_split_file.close() if(self.clean): os.unlink(settings.get_input_split_file(index)) mapper_result = self.mapper(key, value) for reducer_index in range(self.n_reducers): temp_map_file = open(settings.get_temp_map_file(index, reducer_index), "w+") json.dump([(key, value) for (key, value) in mapper_result if self.check_position(key, reducer_index)] , temp_map_file) temp_map_file.close()
def begin_file_split(self, split_index, index): """initialize a split file by opening and adding an index. :param split_index: the split index we are currently on, to be used for naming the file. :param index: the index given to the file. """ file_split = open(settings.get_input_split_file(split_index-1), "w+") file_split.write(str(index) + "\n") return file_split
def initiate_file_split(self, split_index, index): """initialize a split file by opening and adding an index. :param split_index: the split index we are currently on, to be used for naming the file. :param index: the index given to the file. """ file_split = open(settings.get_input_split_file(split_index - 1), "w+") file_split.write(str(index) + "\n") return file_split
def run_mapper(self, index): """Runs the implemented mapper :param index: the index of the thread to run on """ input_split_file = open(settings.get_input_split_file(index), "r") key = input_split_file.readline() value = input_split_file.read() input_split_file.close() if (self.clean): os.unlink(settings.get_input_split_file(index)) mapper_result = self.mapper(key, value) for reducer_index in range(self.n_reducers): temp_map_file = open( settings.get_temp_map_file(index, reducer_index), "w+") json.dump([(key, value) for (key, value) in mapper_result if self.check_position(key, reducer_index)], temp_map_file) temp_map_file.close()