def reformat(path, day, month, year, time_offset, contents, n_contents, out_file): day_existed = False for filename in os.listdir(path): if '%02d%02d%02d-merged.log' % (year, month, day) == filename: print('reading %s' % filename) day_existed = True with open(filename, 'rt') as in_file: for line in in_file: request = parse_line(line) if request['http_request_name'] == 'GET' and request['code'] < 300: try: _, content_id = determine_format_and_content_id(request) except: content_id = None # take content IDs for generating the trace only if they have been found (not None) if content_id is not None: time = request['time'].rpartition('T')[2].rpartition('-')[0] hours, _, time = time.partition(':') minutes, _, seconds = time.partition(':') time = time_offset + int(hours) * 3600 + int(minutes) * 60 + int(seconds) # currently the receiver is constant (one cache scenario), set to 0 out_file.write('%d,%d,%d\n' % (time, 0, content_id)) # determine weight of content if content_id not in contents or contents[content_id] == 1: is_priority_content = False for priority_content_type in priority_content_types: if priority_content_type in line: is_priority_content = True break weight = 2 if is_priority_content else 1 contents[content_id] = weight # clear old records from last-requests dictionary to save memory # do this every 20 seconds because the last 10 seconds are saved #if time_difference(timestamp, request['time']) > 20: # clear_from_last_requests(request['time'], data) return day_existed, contents, n_contents
def analyze(path, day, month, year, data): #timestamp = '%d-%d-%dT00:00:00-02:00' % (year, month, day) mp4_versions = {'web360': '0', 'web480': '1', 'web720': '2', 'http200k': '3', 'http400k': '4', 'hls64k': '5', 'hls200k': '6', 'iphone360': '7'} for filename in os.listdir(path): if '%02d%02d%02d-merged.log' % (year, month, day) == filename: print 'reading %s' % filename with open(filename, 'rt') as in_file: for line in in_file: request = parse_line(line) data['request_names'][request['http_request_name']] += 1 if request['http_request_name'] == 'GET' and request['code'] < 300: if request['body_bytes_sent'] == 0: data['zero_bytes'] += 1 ip_24 = ''.join(request['ip'][-1::-1].partition('.')[2])[-1::-1] ip_16 = ''.join(ip_24[-1::-1].partition('.')[2])[-1::-1] ip_8 = ''.join(ip_16[-1::-1].partition('.')[2])[-1::-1] data['ip_24_ranges'][ip_24] += 1 data['ip_16_ranges'][ip_16] += 1 data['ip_8_ranges'][ip_8] += 1 byte_size = request['body_bytes_sent'] order_of_magnitude = 1 while byte_size != 0: byte_size = byte_size / 10 order_of_magnitude += 1 data['body_bytes_sizes']['10^%d < b < 10^%d' % (order_of_magnitude-1, order_of_magnitude)] += 1 format, content_id = determine_format_and_content_id(request) if format is not None: data['content_type'][format] += 1 if request['cache_hit_or_miss'] == 'HIT': data['content_type_hits'][format] = True if request['body_bytes_sent'] < data['min_hit_size']: data['min_hit_size'] = request['body_bytes_sent'] print_data_dict_compact(data) return data