def parse_chunk(file_name, temp_dir, start, end, job_id): start_time = time.time() f = seek_open(file_name, start, end) mapper = LogMapper() for line in f: mapper.parse_line(line) file_dict = {} for (count_name, counter) in mapper.get_counters().iteritems(): pickle.dump(counter, open("%s/map_%d_%d" % (temp_dir, job_id, count_names.index(count_name)), 'wb')) return job_id
def parse_file(data_file): mapper = LogMapper() for line in open(data_file): mapper.parse_line(line) return mapper.get_counters()