def parse_chunk(file_name, start, end): start_time = time.clock() f = seek_open(file_name, start, end) mapper = LogMapper() for line in f: mapper.parse_line(line) return (mapper, os.getpid(), time.clock() - start_time)
def parse_chunk(file_name, temp_dir, start, end): start_time = time.time() f = seek_open(file_name, start, end) mapper = LogMapper() for line in f: mapper.parse_line(line) out_file_name = "%s/%d" % (temp_dir, start) pickle.dump(mapper, open(out_file_name, 'wb')) return (out_file_name, os.getpid(), time.time() - start_time)
def parse_chunk(file_name, temp_dir, start, end, job_id): start_time = time.time() f = seek_open(file_name, start, end) mapper = LogMapper() for line in f: mapper.parse_line(line) file_dict = {} for (count_name, counter) in mapper.get_counters().iteritems(): pickle.dump(counter, open("%s/map_%d_%d" % (temp_dir, job_id, count_names.index(count_name)), 'wb')) return job_id
def parse_file(data_file): mapper = LogMapper() for line in open(data_file): mapper.parse_line(line) return mapper.get_counters()