def aggregate_data(top_dir, file_pattern): '''Given the diretory path for top directory `top_dir` and the filename pattern to search, return a tuple of all required information to write to report. The report includes the following info: * how many bytes were sent * the top 100 largest cumulative size requests * the number of all unique host IP addresses per week * all 404-status requests sorted by its frequency ''' Data = namedtuple('Data', 'total_sent, cumulative_size_requests, \ weekly_uniq_hosts, status_404_requests') total_sent = 0 cumulative_size_requests= {} status_404_requests = {} weekly_uniq_hosts = {} # +++your code here+++ apache_data = apache_log_infos(top_dir, file_pattern) total_sent = get_bytes(apache_data) cumulative_size_request = get_cumulative_size_request(apache_data) status_404_request = get_status_404_request(apache_data) weekly_uniq_hosts = get_weekly_uniq_hosts(apache_data) return Data(total_sent, cumulative_size_requests, weekly_uniq_hosts, status_404_requests)
def aggregate_info(top_dir, file_pattern): '''Given the diretory path for top directory `top_dir` and the filename pattern to search, return a tuple of all required information to write to report. The report includes the following info: * how many bytes were sent * the top 100 largest cumulative size requests * the number of all unique host IP addresses per week * all 404-status requests sorted by its frequency ''' Data = namedtuple('Data', 'total_sent, cumulative_size_requests, \ weekly_uniq_hosts, status_404_requests') total_sent = 0 cumulative_size_requests = {} status_404_requests = {} weekly_uniq_hosts = {} #+++your code here+++ newdict = apache_log_infos(top_dir, file_pattern) for info in newdict: total_sent += int(info['size']) #Find the 404 statuses if info['status'] == '404': if info['request'] in status_404_requests: status_404_requests[info['request']] += 1 else: status_404_requests[info['request']] = 1 #Get the unique hosts if info['week'] in weekly_uniq_hosts: weekly_uniq_hosts[info['week']].add(info['host']) else: weekly_uniq_hosts[info['week']] = {info['host']} #Get the cumulative size if info['request'] in cumulative_size_requests: cumulative_size_requests[info['request']] += int(info['size']) else: cumulative_size_requests[info['request']] = int(info['size']) cumulative_size_requests = sorted(cumulative_size_requests.items(), key=itemgetter(1), reverse=True) status_404_requests = sorted(status_404_requests.items(), key=itemgetter(1), reverse=True) return Data(total_sent, cumulative_size_requests, weekly_uniq_hosts, status_404_requests)