def map_count(channel, count_name, temp_dir): total = LogCounter(count_name) count = 0 job_id = channel.receive() while job_id != None: temp_file_name = "%s/map_%d_%d" % (temp_dir, job_id, count_names.index(count_name)) total.add_counter(pickle.load(open(temp_file_name, 'rb'))) count += 1 os.remove(temp_file_name) job_id = channel.receive() channel.send(pickle_to_file(total.report(), "%s/out_%d" % (temp_dir, count_names.index(count_name))))
def parse_chunk(file_name, temp_dir, start, end, job_id): start_time = time.time() f = seek_open(file_name, start, end) mapper = LogMapper() for line in f: mapper.parse_line(line) file_dict = {} for (count_name, counter) in mapper.get_counters().iteritems(): pickle.dump(counter, open("%s/map_%d_%d" % (temp_dir, job_id, count_names.index(count_name)), 'wb')) return job_id